diff --git a/dhp-workflows/dhp-aggregation/pom.xml b/dhp-workflows/dhp-aggregation/pom.xml
index 98e22d8a3..c89cc9d1d 100644
--- a/dhp-workflows/dhp-aggregation/pom.xml
+++ b/dhp-workflows/dhp-aggregation/pom.xml
@@ -29,6 +29,13 @@
testCompile
+
+ scala-doc
+ process-resources
+
+ doc
+
+
${scala.version}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java
index 881528425..af0d5169d 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMArticle.java
@@ -5,94 +5,249 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
+/**
+ * This class represent an instance of Pubmed Article extracted from the native XML
+ *
+ * @author Sandro La Bruzzo
+ */
+
public class PMArticle implements Serializable {
+ /**
+ * the Pubmed Identifier
+ */
private String pmid;
+ /**
+ * the DOI
+ */
private String doi;
+ /**
+ * the Pubmed Date extracted from Specifies a date significant to either the article's history or the citation's processing.
+ * All dates will have a , , and elements. Some may have an , , and element(s).
+ */
private String date;
+ /**
+ * This is an 'envelop' element that contains various elements describing the journal cited; i.e., ISSN, Volume, Issue, and PubDate and author name(s), however, it does not contain data itself.
+ */
private PMJournal journal;
+ /**
+ * The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element. Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles. The NLM journal title abbreviation is exported in the element.
+ */
private String title;
+ /**
+ * English-language abstracts are taken directly from the published article.
+ * If the article does not have a published abstract, the National Library of Medicine does not create one,
+ * thus the record lacks the and elements. However, in the absence of a formally
+ * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used.
+ */
private String description;
+ /**
+ * the language in which an article was published is recorded in .
+ * All entries are three letter abbreviations stored in lower case, such as eng, fre, ger, jpn, etc. When a single
+ * record contains more than one language value the XML export program extracts the languages in alphabetic order by the 3-letter language value.
+ * Some records provided by collaborating data producers may contain the value und to identify articles whose language is undetermined.
+ */
private String language;
+
+ /**
+ * NLM controlled vocabulary, Medical Subject Headings (MeSH®), is used to characterize the content of the articles represented by MEDLINE citations. *
+ */
private final List subjects = new ArrayList<>();
+ /**
+ * This element is used to identify the type of article indexed for MEDLINE;
+ * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of
+ * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural).
+ */
private final List publicationTypes = new ArrayList<>();
+ /**
+ * Personal and collective (corporate) author names published with the article are found in .
+ */
private List authors = new ArrayList<>();
- public List getPublicationTypes() {
- return publicationTypes;
- }
-
+ /**
+ * contains the research grant or contract number (or both) that designates financial support by any agency of the United States Public Health Service
+ * or any institute of the National Institutes of Health. Additionally, beginning in late 2005, grant numbers are included for many other US and non-US funding agencies and organizations.
+ */
private final List grants = new ArrayList<>();
- public List getGrants() {
- return grants;
- }
-
+ /**
+ * get the DOI
+ * @return a DOI
+ */
public String getDoi() {
return doi;
}
+ /**
+ * Set the DOI
+ * @param doi a DOI
+ */
public void setDoi(String doi) {
this.doi = doi;
}
+ /**
+ * get the Pubmed Identifier
+ * @return the PMID
+ */
public String getPmid() {
return pmid;
}
+ /**
+ * set the Pubmed Identifier
+ * @param pmid the Pubmed Identifier
+ */
public void setPmid(String pmid) {
this.pmid = pmid;
}
+ /**
+ * the Pubmed Date extracted from Specifies a date significant to either the article's history or the citation's processing.
+ * All dates will have a , , and elements. Some may have an , , and element(s).
+ *
+ * @return the Pubmed Date
+ */
public String getDate() {
return date;
}
+ /**
+ * Set the pubmed Date
+ * @param date
+ */
public void setDate(String date) {
this.date = date;
}
+ /**
+ * The full journal title (taken from NLM cataloging data following NLM rules for how to compile a serial name) is exported in this element.
+ * Some characters that are not part of the NLM MEDLINE/PubMed Character Set reside in a relatively small number of full journal titles.
+ * The NLM journal title abbreviation is exported in the element.
+ *
+ * @return the pubmed Journal Extracted
+ */
public PMJournal getJournal() {
return journal;
}
+ /**
+ * Set the mapped pubmed Journal
+ * @param journal
+ */
public void setJournal(PMJournal journal) {
this.journal = journal;
}
+ /**
+ * English-language abstracts are taken directly from the published article.
+ * If the article does not have a published abstract, the National Library of Medicine does not create one,
+ * thus the record lacks the and elements. However, in the absence of a formally
+ * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used.
+ *
+ * @return the extracted pubmed Title
+ */
public String getTitle() {
return title;
}
+ /**
+ * set the pubmed title
+ * @param title
+ */
public void setTitle(String title) {
this.title = title;
}
+ /**
+ * English-language abstracts are taken directly from the published article.
+ * If the article does not have a published abstract, the National Library of Medicine does not create one,
+ * thus the record lacks the and elements. However, in the absence of a formally
+ * labeled abstract in the published article, text from a substantive "summary", "summary and conclusions" or "conclusions and summary" may be used.
+ *
+ * @return the Mapped Pubmed Article Abstracts
+ */
public String getDescription() {
return description;
}
+ /**
+ * Set the Mapped Pubmed Article Abstracts
+ * @param description
+ */
public void setDescription(String description) {
this.description = description;
}
+ /**
+ * Personal and collective (corporate) author names published with the article are found in .
+ *
+ * @return get the Mapped Authors lists
+ */
public List getAuthors() {
return authors;
}
+ /**
+ * Set the Mapped Authors lists
+ * @param authors
+ */
public void setAuthors(List authors) {
this.authors = authors;
}
+ /**
+ * This element is used to identify the type of article indexed for MEDLINE;
+ * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of
+ * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural).
+ *
+ * @return the mapped Subjects
+ */
public List getSubjects() {
return subjects;
}
+ /**
+ *
+ * the language in which an article was published is recorded in .
+ * All entries are three letter abbreviations stored in lower case, such as eng, fre, ger, jpn, etc. When a single
+ * record contains more than one language value the XML export program extracts the languages in alphabetic order by the 3-letter language value.
+ * Some records provided by collaborating data producers may contain the value und to identify articles whose language is undetermined.
+ *
+ * @return The mapped Language
+ */
public String getLanguage() {
return language;
}
+ /**
+ *
+ * Set The mapped Language
+ *
+ * @param language the mapped Language
+ */
public void setLanguage(String language) {
this.language = language;
}
+
+ /**
+ * This element is used to identify the type of article indexed for MEDLINE;
+ * it characterizes the nature of the information or the manner in which it is conveyed as well as the type of
+ * research support received (e.g., Review, Letter, Retracted Publication, Clinical Conference, Research Support, N.I.H., Extramural).
+ *
+ * @return the mapped Publication Type
+ */
+ public List getPublicationTypes() {
+ return publicationTypes;
+ }
+
+ /**
+ * contains the research grant or contract number (or both) that designates financial support by any agency of the United States Public Health Service
+ * or any institute of the National Institutes of Health. Additionally, beginning in late 2005, grant numbers are included for many other US and non-US funding agencies and organizations.
+ * @return the mapped grants
+ */
+
+ public List getGrants() {
+ return grants;
+ }
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java
index cef92d003..68ef6459e 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMAuthor.java
@@ -3,27 +3,57 @@ package eu.dnetlib.dhp.sx.bio.pubmed;
import java.io.Serializable;
+/**
+ * The type Pubmed author.
+ *
+ * @author Sandro La Bruzzo
+ */
public class PMAuthor implements Serializable {
private String lastName;
private String foreName;
+ /**
+ * Gets last name.
+ *
+ * @return the last name
+ */
public String getLastName() {
return lastName;
}
+ /**
+ * Sets last name.
+ *
+ * @param lastName the last name
+ */
public void setLastName(String lastName) {
this.lastName = lastName;
}
+ /**
+ * Gets fore name.
+ *
+ * @return the fore name
+ */
public String getForeName() {
return foreName;
}
+ /**
+ * Sets fore name.
+ *
+ * @param foreName the fore name
+ */
public void setForeName(String foreName) {
this.foreName = foreName;
}
+ /**
+ * Gets full name.
+ *
+ * @return the full name
+ */
public String getFullName() {
return String
.format("%s, %s", this.foreName != null ? this.foreName : "", this.lastName != null ? this.lastName : "");
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java
index ce9420cc1..abb908483 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMGrant.java
@@ -1,41 +1,86 @@
package eu.dnetlib.dhp.sx.bio.pubmed;
+/**
+ * The type Pm grant.
+ *
+ * @author Sandro La Bruzzo
+ */
public class PMGrant {
private String grantID;
private String agency;
private String country;
+ /**
+ * Instantiates a new Pm grant.
+ */
public PMGrant() {
}
+ /**
+ * Instantiates a new Pm grant.
+ *
+ * @param grantID the grant id
+ * @param agency the agency
+ * @param country the country
+ */
public PMGrant(String grantID, String agency, String country) {
this.grantID = grantID;
this.agency = agency;
this.country = country;
}
+ /**
+ * Gets grant id.
+ *
+ * @return the grant id
+ */
public String getGrantID() {
return grantID;
}
+ /**
+ * Sets grant id.
+ *
+ * @param grantID the grant id
+ */
public void setGrantID(String grantID) {
this.grantID = grantID;
}
+ /**
+ * Gets agency.
+ *
+ * @return the agency
+ */
public String getAgency() {
return agency;
}
+ /**
+ * Sets agency.
+ *
+ * @param agency the agency
+ */
public void setAgency(String agency) {
this.agency = agency;
}
+ /**
+ * Gets country.
+ *
+ * @return the country
+ */
public String getCountry() {
return country;
}
+ /**
+ * Sets country.
+ *
+ * @param country the country
+ */
public void setCountry(String country) {
this.country = country;
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java
index 863a23bd5..731648839 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMJournal.java
@@ -3,6 +3,11 @@ package eu.dnetlib.dhp.sx.bio.pubmed;
import java.io.Serializable;
+/**
+ * The type Pm journal.
+ *
+ * @author Sandro La Bruzzo
+ */
public class PMJournal implements Serializable {
private String issn;
@@ -11,42 +16,92 @@ public class PMJournal implements Serializable {
private String date;
private String title;
+ /**
+ * Gets issn.
+ *
+ * @return the issn
+ */
public String getIssn() {
return issn;
}
+ /**
+ * Sets issn.
+ *
+ * @param issn the issn
+ */
public void setIssn(String issn) {
this.issn = issn;
}
+ /**
+ * Gets volume.
+ *
+ * @return the volume
+ */
public String getVolume() {
return volume;
}
+ /**
+ * Sets volume.
+ *
+ * @param volume the volume
+ */
public void setVolume(String volume) {
this.volume = volume;
}
+ /**
+ * Gets issue.
+ *
+ * @return the issue
+ */
public String getIssue() {
return issue;
}
+ /**
+ * Sets issue.
+ *
+ * @param issue the issue
+ */
public void setIssue(String issue) {
this.issue = issue;
}
+ /**
+ * Gets date.
+ *
+ * @return the date
+ */
public String getDate() {
return date;
}
+ /**
+ * Sets date.
+ *
+ * @param date the date
+ */
public void setDate(String date) {
this.date = date;
}
+ /**
+ * Gets title.
+ *
+ * @return the title
+ */
public String getTitle() {
return title;
}
+ /**
+ * Sets title.
+ *
+ * @param title the title
+ */
public void setTitle(String title) {
this.title = title;
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala
index 80cb0667c..c6d5fdf74 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala
@@ -2,6 +2,12 @@ package eu.dnetlib.dhp.sx.bio.pubmed
import scala.xml.MetaData
import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader}
+
+
+/**
+ *
+ * @param xml
+ */
class PMParser(xml:XMLEventReader) extends Iterator[PMArticle] {
var currentArticle:PMArticle = generateNextArticle()
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java
index 862d39a94..e3829bb7b 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PMSubject.java
@@ -1,40 +1,83 @@
package eu.dnetlib.dhp.sx.bio.pubmed;
+/**
+ * The type Pubmed subject.
+ */
public class PMSubject {
private String value;
private String meshId;
private String registryNumber;
+ /**
+ * Instantiates a new Pm subject.
+ */
public PMSubject() {
}
+ /**
+ * Instantiates a new Pm subject.
+ *
+ * @param value the value
+ * @param meshId the mesh id
+ * @param registryNumber the registry number
+ */
public PMSubject(String value, String meshId, String registryNumber) {
this.value = value;
this.meshId = meshId;
this.registryNumber = registryNumber;
}
+ /**
+ * Gets value.
+ *
+ * @return the value
+ */
public String getValue() {
return value;
}
+ /**
+ * Sets value.
+ *
+ * @param value the value
+ */
public void setValue(String value) {
this.value = value;
}
+ /**
+ * Gets mesh id.
+ *
+ * @return the mesh id
+ */
public String getMeshId() {
return meshId;
}
+ /**
+ * Sets mesh id.
+ *
+ * @param meshId the mesh id
+ */
public void setMeshId(String meshId) {
this.meshId = meshId;
}
+ /**
+ * Gets registry number.
+ *
+ * @return the registry number
+ */
public String getRegistryNumber() {
return registryNumber;
}
+ /**
+ * Sets registry number.
+ *
+ * @param registryNumber the registry number
+ */
public void setRegistryNumber(String registryNumber) {
this.registryNumber = registryNumber;
}
diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
index 13f38408e..ecef32202 100644
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/sx/bio/pubmed/PubMedToOaf.scala
@@ -8,6 +8,9 @@ import scala.collection.JavaConverters._
import java.util.regex.Pattern
+/**
+ *
+ */
object PubMedToOaf {
val SUBJ_CLASS = "keywords"
@@ -15,7 +18,17 @@ object PubMedToOaf {
"pmid" -> "https://pubmed.ncbi.nlm.nih.gov/",
"doi" -> "https://dx.doi.org/"
)
+ val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
+ val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central")
+
+
+ /**
+ * Cleaning the DOI Applying regex in order to
+ * remove doi starting with URL
+ * @param doi input DOI
+ * @return cleaned DOI
+ */
def cleanDoi(doi: String): String = {
val regex = "^10.\\d{4,9}\\/[\\[\\]\\-\\<\\>._;()\\/:A-Z0-9]+$"
@@ -30,6 +43,15 @@ object PubMedToOaf {
null
}
+ /**
+ *
+ * Create an instance of class extends Result
+ * starting from OAF instanceType value
+ *
+ * @param cobjQualifier OAF instance type
+ * @param vocabularies All dnet vocabularies
+ * @return the correct instance
+ */
def createResult(cobjQualifier: Qualifier, vocabularies: VocabularyGroup): Result = {
val result_typologies = getVocabularyTerm(ModelConstants.DNET_RESULT_TYPOLOGIES, vocabularies, cobjQualifier.getClassid)
result_typologies.getClassid match {
@@ -42,6 +64,12 @@ object PubMedToOaf {
}
}
+ /**
+ * Mapping the Pubmedjournal info into the OAF Journale
+ *
+ * @param j the pubmedJournal
+ * @return the OAF Journal
+ */
def mapJournal(j: PMJournal): Journal = {
if (j == null)
return null
@@ -49,6 +77,7 @@ object PubMedToOaf {
journal.setDataInfo(dataInfo)
journal.setName(j.getTitle)
+ journal.setConferencedate(j.getDate)
journal.setVol(j.getVolume)
journal.setIssnPrinted(j.getIssn)
journal.setIss(j.getIssue)
@@ -57,25 +86,43 @@ object PubMedToOaf {
}
-
+ /**
+ *
+ * Find vocabulary term into synonyms and term in the vocabulary
+ *
+ * @param vocabularyName the input vocabulary name
+ * @param vocabularies all the vocabularies
+ * @param term the term to find
+ *
+ * @return the cleaned term value
+ */
def getVocabularyTerm(vocabularyName: String, vocabularies: VocabularyGroup, term: String): Qualifier = {
val a = vocabularies.getSynonymAsQualifier(vocabularyName, term)
val b = vocabularies.getTermAsQualifier(vocabularyName, term)
if (a == null) b else a
}
- val dataInfo: DataInfo = OafMapperUtils.dataInfo(false, null, false, false, ModelConstants.PROVENANCE_ACTION_SET_QUALIFIER, "0.9")
- val collectedFrom: KeyValue = OafMapperUtils.keyValue(ModelConstants.EUROPE_PUBMED_CENTRAL_ID, "Europe PubMed Central")
+ /**
+ * Map the Pubmed Article into the OAF instance
+ *
+ *
+ * @param article the pubmed articles
+ * @param vocabularies the vocabularies
+ * @return The OAF instance if the mapping did not fail
+ */
def convert(article: PMArticle, vocabularies: VocabularyGroup): Result = {
if (article.getPublicationTypes == null)
return null
- val i = new Instance
+
+
+ // MAP PMID into pid with classid = classname = pmid
val pidList: List[StructuredProperty] = List(OafMapperUtils.structuredProperty(article.getPmid, PidType.pmid.toString, PidType.pmid.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo))
if (pidList == null)
return null
+ // MAP //ArticleId[./@IdType="doi"] into alternateIdentifier with classid = classname = doi
var alternateIdentifier: StructuredProperty = null
if (article.getDoi != null) {
val normalizedPid = cleanDoi(article.getDoi)
@@ -83,43 +130,64 @@ object PubMedToOaf {
alternateIdentifier = OafMapperUtils.structuredProperty(normalizedPid, PidType.doi.toString, PidType.doi.toString, ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, dataInfo)
}
+ // INSTANCE MAPPING
+ //--------------------------------------------------------------------------------------
+
// If the article contains the typology Journal Article then we apply this type
//else We have to find a terms that match the vocabulary otherwise we discard it
val ja = article.getPublicationTypes.asScala.find(s => "Journal Article".equalsIgnoreCase(s.getValue))
+ val pubmedInstance = new Instance
if (ja.isDefined) {
val cojbCategory = getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, ja.get.getValue)
- i.setInstancetype(cojbCategory)
+ pubmedInstance.setInstancetype(cojbCategory)
} else {
val i_type = article.getPublicationTypes.asScala
.map(s => getVocabularyTerm(ModelConstants.DNET_PUBLICATION_RESOURCE, vocabularies, s.getValue))
.find(q => q != null)
if (i_type.isDefined)
- i.setInstancetype(i_type.get)
+ pubmedInstance.setInstancetype(i_type.get)
else
return null
}
- val result = createResult(i.getInstancetype, vocabularies)
+ val result = createResult(pubmedInstance.getInstancetype, vocabularies)
if (result == null)
return result
result.setDataInfo(dataInfo)
- i.setPid(pidList.asJava)
+ pubmedInstance.setPid(pidList.asJava)
if (alternateIdentifier != null)
- i.setAlternateIdentifier(List(alternateIdentifier).asJava)
- result.setInstance(List(i).asJava)
- i.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection.breakOut)
+ pubmedInstance.setAlternateIdentifier(List(alternateIdentifier).asJava)
+ result.setInstance(List(pubmedInstance).asJava)
+ pubmedInstance.getPid.asScala.filter(p => "pmid".equalsIgnoreCase(p.getQualifier.getClassid)).map(p => p.getValue)(collection.breakOut)
+ //CREATE URL From pmid
val urlLists: List[String] = pidList
.map(s => (urlMap.getOrElse(s.getQualifier.getClassid, ""), s.getValue))
.filter(t => t._1.nonEmpty)
.map(t => t._1 + t._2)
if (urlLists != null)
- i.setUrl(urlLists.asJava)
- i.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
- i.setCollectedfrom(collectedFrom)
+ pubmedInstance.setUrl(urlLists.asJava)
+
+ //ASSIGN DateofAcceptance
+ pubmedInstance.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
+ //ASSIGN COLLECTEDFROM
+ pubmedInstance.setCollectedfrom(collectedFrom)
result.setPid(pidList.asJava)
+
+ //END INSTANCE MAPPING
+ //--------------------------------------------------------------------------------------
+
+
+ // JOURNAL MAPPING
+ //--------------------------------------------------------------------------------------
if (article.getJournal != null && result.isInstanceOf[Publication])
result.asInstanceOf[Publication].setJournal(mapJournal(article.getJournal))
result.setCollectedfrom(List(collectedFrom).asJava)
+ //END JOURNAL MAPPING
+ //--------------------------------------------------------------------------------------
+
+
+ // RESULT MAPPING
+ //--------------------------------------------------------------------------------------
result.setDateofacceptance(OafMapperUtils.field(GraphCleaningFunctions.cleanDate(article.getDate), dataInfo))
if (article.getTitle == null || article.getTitle.isEmpty)
@@ -159,6 +227,9 @@ object PubMedToOaf {
result.setId(article.getPmid)
+
+ // END RESULT MAPPING
+ //--------------------------------------------------------------------------------------
val id = IdentifierFactory.createIdentifier(result)
if (article.getPmid.equalsIgnoreCase(id))
return null
diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/datacite.md b/dhp-workflows/dhp-aggregation/src/site/markdown/datacite.md
new file mode 100644
index 000000000..e69de29bb
diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/index.md b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md
new file mode 100644
index 000000000..c0c756082
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/site/markdown/index.md
@@ -0,0 +1,9 @@
+##DHP-Aggregation
+
+This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records.
+
+Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure
+the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping
+of each MDStore.
+
+It defines [mappings](mappings.md) for transformation of different datasource (See mapping section).
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/introduction.md b/dhp-workflows/dhp-aggregation/src/site/markdown/introduction.md
new file mode 100644
index 000000000..9da46a27e
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/site/markdown/introduction.md
@@ -0,0 +1,7 @@
+##DHP-Aggregation
+
+This module defines a set of oozie workflows for the **collection** and **transformation** of metadata records.
+
+Both workflows interact with the Metadata Store Manager (MdSM) to handle the logical transactions required to ensure
+the consistency of the read/write operations on the data as the MdSM in fact keeps track of the logical-physical mapping
+of each MDStore.
\ No newline at end of file
diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/mappings.md b/dhp-workflows/dhp-aggregation/src/site/markdown/mappings.md
new file mode 100644
index 000000000..576c4b6be
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/site/markdown/mappings.md
@@ -0,0 +1,18 @@
+DHP Aggregation
+===============
+
+DHP-Aggregations contains different mappings from original data format into OAF Data Format,
+which converge in the graph in different ways:
+
+- Via Action Manager
+- Direct in the MdStore on Hadoop
+
+Below the list of the implemented mapping
+
+
+Mappings
+=======
+
+1. [PubMed](pubmed.md)
+2. [Datacite](datacite.md)
+
diff --git a/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md
new file mode 100644
index 000000000..f6327a51b
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/site/markdown/pubmed.md
@@ -0,0 +1,62 @@
+#Pubmed Mapping
+This section describes the mapping implemented for [MEDLINE/PubMed](https://pubmed.ncbi.nlm.nih.gov/).
+
+Collection
+---------
+The native data is collected from [ftp baseline](https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/) containing XML with
+the following [shcema](https://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html)
+
+
+Parsing
+-------
+The resposible class of parsing is [PMParser](./scaladocs/#eu.dnetlib.dhp.sx.bio.pubmed.PMParser) that generates
+an intermediate mapping of PubMed Article defined [here](/apidocs/eu/dnetlib/dhp/sx/bio/pubmed/package-summary.html)
+
+
+Mapping
+-------
+
+The table below describes the mapping from the XML Native to the OAF mapping
+
+
+
+
+
+| Xpath Source | Oaf Field | Notes |
+| ----------- | ----------- | ----------- |
+| //PMID | pid | classid = classname = pmid
+| | **Instance Mapping** | |
+|//PublicationType | InstanceType | If the article contains the typology **Journal Article** then we apply this type else We have to find a terms that match the vocabulary otherwise we discard it
+|//PMID | instance/PID | Map the pmid also in the pid in the instance |
+| //ArticleId[./@IdType="doi" | instance/alternateIdentifier |classid = classname = doi
+|//PMID | instance/URL | prepend to the PMId the base url https://pubmed.ncbi.nlm.nih.gov/
+| //PubmedPubDate | instance/Dateofacceptance | apply the function GraphCleaningFunctions.cleanDate before assign it
+| FOR ALL INSTANCE | CollectedFrom | datasourceName: *Europe PubMed Central* DatasourceId:
+| | **Journal Mapping** | |
+|//Journal/PubDate| Journal/Conferencedate | map the date of the Journal
+|//Journal/Title| Journal/Name | |
+|//Journal/Volume| Journal/Vol | |
+|//Journal/ISSN| Journal/issPrinted | |
+|//Journal/Issue| Journal/Iss | |
+| | **Publication Mapping** | |
+| //PubmedPubDate | Dateofacceptance | apply the function GraphCleaningFunctions.cleanDate before assign it
+| //Title | title | with qualifier ModelConstants.MAIN_TITLE_QUALIFIER
+| //AbstractText | Description ||
+|//Language| Language| cleaning vocabulary -> dnet:languages
+|//DescriptorName| Subject | classId, className = keyword
+| | **Author Mapping** | |
+|//Author/LastName| author.Surname| |
+|//Author/ForeName| author.Forename| |
+|//Author/FullName| author.Forename| Concatenation of forname + lastName if exist |
+|FOR ALL AUTHOR | author.rank| sequential number starting from 1|
+
+
+
+
+
+
+
+
+
+
+
diff --git a/dhp-workflows/dhp-aggregation/src/site/resources/images/openaire.png b/dhp-workflows/dhp-aggregation/src/site/resources/images/openaire.png
new file mode 100644
index 000000000..00d320c39
Binary files /dev/null and b/dhp-workflows/dhp-aggregation/src/site/resources/images/openaire.png differ
diff --git a/dhp-workflows/dhp-aggregation/src/site/site.xml b/dhp-workflows/dhp-aggregation/src/site/site.xml
new file mode 100644
index 000000000..da5da0f1e
--- /dev/null
+++ b/dhp-workflows/dhp-aggregation/src/site/site.xml
@@ -0,0 +1,32 @@
+
+
+
+ org.apache.maven.skins
+ maven-fluido-skin
+ 1.8
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 274192c67..71c55d1f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -550,7 +550,7 @@
org.apache.maven.plugins
maven-site-plugin
- 3.7.1
+ 3.9.1