Add Collector Plugin for Zenodo Dumps #516

Merged
claudio.atzori merged 3 commits from zenodo_dump_collection into beta 2024-12-06 13:51:14 +01:00
7 changed files with 82 additions and 70 deletions
Showing only changes of commit 5f134c4045 - Show all commits

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.sx.bio.pubmed; package eu.dnetlib.dhp.sx.bio.pubmed;
/** /**
@ -7,31 +8,32 @@ package eu.dnetlib.dhp.sx.bio.pubmed;
*/ */
public class PMAffiliation { public class PMAffiliation {
private String name; private String name;
private PMIdentifier identifier; private PMIdentifier identifier;
public PMAffiliation() { public PMAffiliation() {
} }
public PMAffiliation(String name, PMIdentifier identifier) {
this.name = name;
this.identifier = identifier;
}
public String getName() { public PMAffiliation(String name, PMIdentifier identifier) {
return name; this.name = name;
} this.identifier = identifier;
}
public void setName(String name) { public String getName() {
this.name = name; return name;
} }
public PMIdentifier getIdentifier() { public void setName(String name) {
return identifier; this.name = name;
} }
public void setIdentifier(PMIdentifier identifier) { public PMIdentifier getIdentifier() {
this.identifier = identifier; return identifier;
} }
public void setIdentifier(PMIdentifier identifier) {
this.identifier = identifier;
}
} }

View File

@ -97,5 +97,4 @@ public class PMAuthor implements Serializable {
this.affiliation = affiliation; this.affiliation = affiliation;
} }
} }

View File

@ -1,53 +1,53 @@
package eu.dnetlib.dhp.sx.bio.pubmed; package eu.dnetlib.dhp.sx.bio.pubmed;
public class PMIdentifier { public class PMIdentifier {
private String pid; private String pid;
private String type; private String type;
public PMIdentifier(String pid, String type) {
this.pid = cleanPid(pid);
this.type = type;
}
public PMIdentifier(String pid, String type) { public PMIdentifier() {
this.pid = cleanPid(pid);
this.type = type;
}
public PMIdentifier() { }
} private String cleanPid(String pid) {
private String cleanPid(String pid) { if (pid == null) {
return null;
}
if (pid == null) { // clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705
return null; if (pid.matches("[0-9]{15}[0-9X]")) {
} return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4");
}
// clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705 // clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543
if (pid.matches("[0-9]{15}[0-9X]")) { if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) {
return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4"); return pid.replaceAll("http://orcid.org/", "");
} }
return pid;
}
// clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543 public String getPid() {
if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) { return pid;
return pid.replaceAll("http://orcid.org/", ""); }
}
return pid;
}
public String getPid() { public PMIdentifier setPid(String pid) {
return pid; this.pid = cleanPid(pid);
} return this;
}
public PMIdentifier setPid(String pid) { public String getType() {
this.pid = cleanPid(pid); return type;
return this; }
}
public String getType() { public PMIdentifier setType(String type) {
return type; this.type = type;
} return this;
}
public PMIdentifier setType(String type) {
this.type = type;
return this;
}
} }

View File

@ -673,7 +673,6 @@ case object Crossref2Oaf {
val doi = input.getString(0) val doi = input.getString(0)
val rorId = input.getString(1) val rorId = input.getString(1)
val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true) val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true)
val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId) val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId)

View File

@ -82,21 +82,22 @@ class PMParser2 {
a.setLastName((author \ "LastName").text) a.setLastName((author \ "LastName").text)
a.setForeName((author \ "ForeName").text) a.setForeName((author \ "ForeName").text)
val id = (author \ "Identifier").text val id = (author \ "Identifier").text
val idType =(author \ "Identifier" \ "@Source").text val idType = (author \ "Identifier" \ "@Source").text
if(id != null && id.nonEmpty && idType != null && idType.nonEmpty) { if (id != null && id.nonEmpty && idType != null && idType.nonEmpty) {
a.setIdentifier(new PMIdentifier(id, idType)) a.setIdentifier(new PMIdentifier(id, idType))
} }
val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text
val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text
val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text
if(affiliation != null && affiliation.nonEmpty) { if (affiliation != null && affiliation.nonEmpty) {
val aff = new PMAffiliation() val aff = new PMAffiliation()
aff.setName(affiliation) aff.setName(affiliation)
if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) { if (
affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty
) {
aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType)) aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType))
} }
a.setAffiliation(aff) a.setAffiliation(aff)

View File

@ -294,11 +294,23 @@ object PubMedToOaf {
author.setName(a.getForeName) author.setName(a.getForeName)
author.setSurname(a.getLastName) author.setSurname(a.getLastName)
author.setFullname(a.getFullName) author.setFullname(a.getFullName)
if(a.getIdentifier != null) { if (a.getIdentifier != null) {
author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid, author.setPid(
OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava) List(
OafMapperUtils.structuredProperty(
a.getIdentifier.getPid,
OafMapperUtils.qualifier(
a.getIdentifier.getType,
a.getIdentifier.getType,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES
),
dataInfo
)
).asJava
)
} }
if (a.getAffiliation!= null) if (a.getAffiliation != null)
author.setRawAffiliationString(List(a.getAffiliation.getName).asJava) author.setRawAffiliationString(List(a.getAffiliation.getName).asJava)
author.setRank(index + 1) author.setRank(index + 1)
author author

View File

@ -63,7 +63,6 @@ class BioScholixTest extends AbstractVocabularyTest {
"0000000333457333", "0000000333457333",
"0000000335964515", "0000000335964515",
"0000000302921949", "0000000302921949",
"http://orcid.org/0000-0001-8567-3543", "http://orcid.org/0000-0001-8567-3543",
"http://orcid.org/0000-0001-7868-8528", "http://orcid.org/0000-0001-7868-8528",
"0000-0001-9189-1440", "0000-0001-9189-1440",