diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java index e3dcc95dd0..427eb2725a 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAffiliation.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.sx.bio.pubmed; /** @@ -7,31 +8,32 @@ package eu.dnetlib.dhp.sx.bio.pubmed; */ public class PMAffiliation { - private String name; + private String name; - private PMIdentifier identifier; + private PMIdentifier identifier; - public PMAffiliation() { + public PMAffiliation() { - } - public PMAffiliation(String name, PMIdentifier identifier) { - this.name = name; - this.identifier = identifier; - } + } - public String getName() { - return name; - } + public PMAffiliation(String name, PMIdentifier identifier) { + this.name = name; + this.identifier = identifier; + } - public void setName(String name) { - this.name = name; - } + public String getName() { + return name; + } - public PMIdentifier getIdentifier() { - return identifier; - } + public void setName(String name) { + this.name = name; + } - public void setIdentifier(PMIdentifier identifier) { - this.identifier = identifier; - } + public PMIdentifier getIdentifier() { + return identifier; + } + + public void setIdentifier(PMIdentifier identifier) { + this.identifier = identifier; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java index b0df256634..e023f2e626 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java @@ -97,5 +97,4 @@ public class PMAuthor implements Serializable { this.affiliation = affiliation; } - } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java index 0c8c55e40e..6cd17a90c0 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMIdentifier.java @@ -1,53 +1,53 @@ + package eu.dnetlib.dhp.sx.bio.pubmed; public class PMIdentifier { - private String pid; - private String type; + private String pid; + private String type; + public PMIdentifier(String pid, String type) { + this.pid = cleanPid(pid); + this.type = type; + } - public PMIdentifier(String pid, String type) { - this.pid = cleanPid(pid); - this.type = type; - } + public PMIdentifier() { - public PMIdentifier() { + } - } + private String cleanPid(String pid) { - private String cleanPid(String pid) { + if (pid == null) { + return null; + } - if (pid == null) { - return null; - } + // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 + if (pid.matches("[0-9]{15}[0-9X]")) { + return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); + } - // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 - if (pid.matches("[0-9]{15}[0-9X]")) { - return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); - } + // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 + if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { + return pid.replaceAll("http://orcid.org/", ""); + } + return pid; + } - // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 - if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { - return pid.replaceAll("http://orcid.org/", ""); - } - return pid; - } + public String getPid() { + return pid; + } - public String getPid() { - return pid; - } + public PMIdentifier setPid(String pid) { + this.pid = cleanPid(pid); + return this; + } - public PMIdentifier setPid(String pid) { - this.pid = cleanPid(pid); - return this; - } + public String getType() { + return type; + } - public String getType() { - return type; - } - - public PMIdentifier setType(String type) { - this.type = type; - return this; - } + public PMIdentifier setType(String type) { + this.type = type; + return this; + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index e4a238c8f3..de68ebb58e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -673,7 +673,6 @@ case object Crossref2Oaf { val doi = input.getString(0) val rorId = input.getString(1) - val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true) val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala index 2eb4bea65c..bc9a2cf02b 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser2.scala @@ -82,21 +82,22 @@ class PMParser2 { a.setLastName((author \ "LastName").text) a.setForeName((author \ "ForeName").text) val id = (author \ "Identifier").text - val idType =(author \ "Identifier" \ "@Source").text + val idType = (author \ "Identifier" \ "@Source").text - if(id != null && id.nonEmpty && idType != null && idType.nonEmpty) { + if (id != null && id.nonEmpty && idType != null && idType.nonEmpty) { a.setIdentifier(new PMIdentifier(id, idType)) } - val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text - val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text + val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text - if(affiliation != null && affiliation.nonEmpty) { + if (affiliation != null && affiliation.nonEmpty) { val aff = new PMAffiliation() aff.setName(affiliation) - if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) { + if ( + affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty + ) { aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType)) } a.setAffiliation(aff) diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala index 5e14c731a6..281ca0e07d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala @@ -294,11 +294,23 @@ object PubMedToOaf { author.setName(a.getForeName) author.setSurname(a.getLastName) author.setFullname(a.getFullName) - if(a.getIdentifier != null) { - author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid, - OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava) + if (a.getIdentifier != null) { + author.setPid( + List( + OafMapperUtils.structuredProperty( + a.getIdentifier.getPid, + OafMapperUtils.qualifier( + a.getIdentifier.getType, + a.getIdentifier.getType, + ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES + ), + dataInfo + ) + ).asJava + ) } - if (a.getAffiliation!= null) + if (a.getAffiliation != null) author.setRawAffiliationString(List(a.getAffiliation.getName).asJava) author.setRank(index + 1) author diff --git a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala index 4a926df015..cb7826dbff 100644 --- a/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala +++ b/dhp-workflows/dhp-aggregation/src/test/scala/eu/dnetlib/dhp/sx/bio/BioScholixTest.scala @@ -63,7 +63,6 @@ class BioScholixTest extends AbstractVocabularyTest { "0000000333457333", "0000000335964515", "0000000302921949", - "http://orcid.org/0000-0001-8567-3543", "http://orcid.org/0000-0001-7868-8528", "0000-0001-9189-1440",