add author sequence number

This commit is contained in:
Sandro La Bruzzo 2021-03-11 11:32:32 +01:00
parent a8e5d0ea0d
commit 4bb3bcafa5
5 changed files with 25 additions and 18 deletions

View File

@ -12,6 +12,7 @@ public class Author implements Serializable {
private String surname;
// START WITH 1
private Integer rank;
private List<StructuredProperty> pid;

View File

@ -21,7 +21,7 @@ case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
case class mappingAffiliation(name: String) {}
case class mappingAuthor(given: Option[String], family: String, ORCID: Option[String], affiliation: Option[mappingAffiliation]) {}
case class mappingAuthor(given: Option[String], family: String, sequence:Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation]) {}
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
@ -162,7 +162,12 @@ case object Crossref2Oaf {
//Mapping Author
val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List())
result.setAuthor(authorList.map(a => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull)).asJava)
val sorted_list = authorList.sortWith((a:mappingAuthor, b:mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first"))
result.setAuthor(sorted_list.zipWithIndex.map{case (a, index) => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)}.asJava)
// Mapping instance
val instance = new Instance()
@ -205,11 +210,12 @@ case object Crossref2Oaf {
}
def generateAuhtor(given: String, family: String, orcid: String): Author = {
def generateAuhtor(given: String, family: String, orcid: String, index:Int): Author = {
val a = new Author
a.setName(given)
a.setSurname(family)
a.setFullname(s"$given $family")
a.setRank(index+1)
if (StringUtils.isNotBlank(orcid))
a.setPid(List(createSP(orcid, ORCID_PENDING, PID_TYPES, generateDataInfo())).asJava)

View File

@ -32,11 +32,11 @@ case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String
case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {}
case class MagAuthorAffiliation(author: MagAuthor, affiliation:String)
case class MagAuthorAffiliation(author: MagAuthor, affiliation:String, sequenceNumber:Int)
case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {}
case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String) {}
case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String, sequenceNumber:Int) {}
case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {}
@ -209,9 +209,9 @@ case object ConversionUtil {
val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
a.setFullname(f.author.DisplayName.get)
a.setRank(f.sequenceNumber)
if (f.author.DisplayName.isDefined)
a.setFullname(f.author.DisplayName.get)
if(f.affiliation!= null)
a.setAffiliation(List(asField(f.affiliation)).asJava)
a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)

View File

@ -58,13 +58,13 @@ object SparkProcessMAG {
val paperAuthorAffiliation = spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation]
paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId")))
.map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null)) }
.map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null, a.AuthorSequenceNumber)) }
.joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left")
.map(s => {
val mpa = s._1._2
val af = s._2
if (af != null) {
MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName)
MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName, mpa.sequenceNumber)
} else
mpa
}).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors"))

View File

@ -12,14 +12,6 @@
"abstract": "<jats:p>A qualitative spot-test and tandem quantitative analysis of dipyrone in the bulk drugand in pharmaceutical preparations is proposed. The formation of a reddish-violet\u00a0 color indicates a positive result. In sequence a quantitative procedure can be performed in the same flask. The quantitative results obtained were statistically compared with those obtained with the method indicated by the Brazilian\u00a0 Pharmacopoeia, using the Student\u2019s t and the F tests. Considering the concentration in a 100 \u03bcL aliquot, the qualitative visual limit of detection is about 5\u00d710-6 g; instrumental LOD \u2245 1.4\u00d710-4 mol L-1 ; LOQ \u2245 4.5\u00d710-4 mol L-1.</jats:p>",
"prefix": "10.26850",
"author": [
{
"authenticated-orcid": false,
"given": "Matthieu",
"family": "Tubino",
"sequence": "first",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0002-1987-3907"
},
{
"affiliation": [],
"given": "A. C.",
@ -49,6 +41,14 @@
"sequence": "additional",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0001-5564-1639"
},
{
"authenticated-orcid": false,
"given": "Matthieu",
"family": "Tubino",
"sequence": "first",
"affiliation": [],
"ORCID": "http://orcid.org/0000-0002-1987-3907"
}
],
"reference-count": 0,