Add Collector Plugin for Zenodo Dumps #516

Merged
claudio.atzori merged 3 commits from zenodo_dump_collection into beta 2024-12-06 13:51:14 +01:00
7 changed files with 82 additions and 70 deletions
Showing only changes of commit 5f134c4045 - Show all commits

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.sx.bio.pubmed;
/**
@ -7,31 +8,32 @@ package eu.dnetlib.dhp.sx.bio.pubmed;
*/
public class PMAffiliation {
private String name;
private String name;
private PMIdentifier identifier;
private PMIdentifier identifier;
public PMAffiliation() {
public PMAffiliation() {
}
public PMAffiliation(String name, PMIdentifier identifier) {
this.name = name;
this.identifier = identifier;
}
}
public String getName() {
return name;
}
public PMAffiliation(String name, PMIdentifier identifier) {
this.name = name;
this.identifier = identifier;
}
public void setName(String name) {
this.name = name;
}
public String getName() {
return name;
}
public PMIdentifier getIdentifier() {
return identifier;
}
public void setName(String name) {
this.name = name;
}
public void setIdentifier(PMIdentifier identifier) {
this.identifier = identifier;
}
public PMIdentifier getIdentifier() {
return identifier;
}
public void setIdentifier(PMIdentifier identifier) {
this.identifier = identifier;
}
}

View File

@ -97,5 +97,4 @@ public class PMAuthor implements Serializable {
this.affiliation = affiliation;
}
}

View File

@ -1,53 +1,53 @@
package eu.dnetlib.dhp.sx.bio.pubmed;
public class PMIdentifier {
private String pid;
private String type;
private String pid;
private String type;
public PMIdentifier(String pid, String type) {
this.pid = cleanPid(pid);
this.type = type;
}
public PMIdentifier(String pid, String type) {
this.pid = cleanPid(pid);
this.type = type;
}
public PMIdentifier() {
public PMIdentifier() {
}
}
private String cleanPid(String pid) {
private String cleanPid(String pid) {
if (pid == null) {
return null;
}
if (pid == null) {
return null;
}
// clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705
if (pid.matches("[0-9]{15}[0-9X]")) {
return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4");
}
// clean ORCID ID in the form 0000000163025705 to 0000-0001-6302-5705
if (pid.matches("[0-9]{15}[0-9X]")) {
return pid.replaceAll("(.{4})(.{4})(.{4})(.{4})", "$1-$2-$3-$4");
}
// clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543
if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) {
return pid.replaceAll("http://orcid.org/", "");
}
return pid;
}
// clean ORCID in the form http://orcid.org/0000-0001-8567-3543 to 0000-0001-8567-3543
if (pid.matches("http://orcid.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{4}")) {
return pid.replaceAll("http://orcid.org/", "");
}
return pid;
}
public String getPid() {
return pid;
}
public String getPid() {
return pid;
}
public PMIdentifier setPid(String pid) {
this.pid = cleanPid(pid);
return this;
}
public PMIdentifier setPid(String pid) {
this.pid = cleanPid(pid);
return this;
}
public String getType() {
return type;
}
public String getType() {
return type;
}
public PMIdentifier setType(String type) {
this.type = type;
return this;
}
public PMIdentifier setType(String type) {
this.type = type;
return this;
}
}

View File

@ -673,7 +673,6 @@ case object Crossref2Oaf {
val doi = input.getString(0)
val rorId = input.getString(1)
val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true)
val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId)

View File

@ -82,21 +82,22 @@ class PMParser2 {
a.setLastName((author \ "LastName").text)
a.setForeName((author \ "ForeName").text)
val id = (author \ "Identifier").text
val idType =(author \ "Identifier" \ "@Source").text
val idType = (author \ "Identifier" \ "@Source").text
if(id != null && id.nonEmpty && idType != null && idType.nonEmpty) {
if (id != null && id.nonEmpty && idType != null && idType.nonEmpty) {
a.setIdentifier(new PMIdentifier(id, idType))
}
val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text
val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text
val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text
val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text
if(affiliation != null && affiliation.nonEmpty) {
if (affiliation != null && affiliation.nonEmpty) {
val aff = new PMAffiliation()
aff.setName(affiliation)
if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) {
if (
affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty
) {
aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType))
}
a.setAffiliation(aff)

View File

@ -294,11 +294,23 @@ object PubMedToOaf {
author.setName(a.getForeName)
author.setSurname(a.getLastName)
author.setFullname(a.getFullName)
if(a.getIdentifier != null) {
author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid,
OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava)
if (a.getIdentifier != null) {
author.setPid(
List(
OafMapperUtils.structuredProperty(
a.getIdentifier.getPid,
OafMapperUtils.qualifier(
a.getIdentifier.getType,
a.getIdentifier.getType,
ModelConstants.DNET_PID_TYPES,
ModelConstants.DNET_PID_TYPES
),
dataInfo
)
).asJava
)
}
if (a.getAffiliation!= null)
if (a.getAffiliation != null)
author.setRawAffiliationString(List(a.getAffiliation.getName).asJava)
author.setRank(index + 1)
author

View File

@ -63,7 +63,6 @@ class BioScholixTest extends AbstractVocabularyTest {
"0000000333457333",
"0000000335964515",
"0000000302921949",
"http://orcid.org/0000-0001-8567-3543",
"http://orcid.org/0000-0001-7868-8528",
"0000-0001-9189-1440",