forked from antonis.lempesis/dnet-hadoop
add author sequence number
This commit is contained in:
parent
a8e5d0ea0d
commit
4bb3bcafa5
|
@ -12,6 +12,7 @@ public class Author implements Serializable {
|
|||
|
||||
private String surname;
|
||||
|
||||
// START WITH 1
|
||||
private Integer rank;
|
||||
|
||||
private List<StructuredProperty> pid;
|
||||
|
|
|
@ -21,7 +21,7 @@ case class CrossrefDT(doi: String, json:String, timestamp: Long) {}
|
|||
|
||||
case class mappingAffiliation(name: String) {}
|
||||
|
||||
case class mappingAuthor(given: Option[String], family: String, ORCID: Option[String], affiliation: Option[mappingAffiliation]) {}
|
||||
case class mappingAuthor(given: Option[String], family: String, sequence:Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation]) {}
|
||||
|
||||
case class mappingFunder(name: String, DOI: Option[String], award: Option[List[String]]) {}
|
||||
|
||||
|
@ -162,7 +162,12 @@ case object Crossref2Oaf {
|
|||
|
||||
//Mapping Author
|
||||
val authorList: List[mappingAuthor] = (json \ "author").extractOrElse[List[mappingAuthor]](List())
|
||||
result.setAuthor(authorList.map(a => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull)).asJava)
|
||||
|
||||
|
||||
|
||||
val sorted_list = authorList.sortWith((a:mappingAuthor, b:mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first"))
|
||||
|
||||
result.setAuthor(sorted_list.zipWithIndex.map{case (a, index) => generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index)}.asJava)
|
||||
|
||||
// Mapping instance
|
||||
val instance = new Instance()
|
||||
|
@ -205,11 +210,12 @@ case object Crossref2Oaf {
|
|||
}
|
||||
|
||||
|
||||
def generateAuhtor(given: String, family: String, orcid: String): Author = {
|
||||
def generateAuhtor(given: String, family: String, orcid: String, index:Int): Author = {
|
||||
val a = new Author
|
||||
a.setName(given)
|
||||
a.setSurname(family)
|
||||
a.setFullname(s"$given $family")
|
||||
a.setRank(index+1)
|
||||
if (StringUtils.isNotBlank(orcid))
|
||||
a.setPid(List(createSP(orcid, ORCID_PENDING, PID_TYPES, generateDataInfo())).asJava)
|
||||
|
||||
|
|
|
@ -32,11 +32,11 @@ case class MagAffiliation(AffiliationId: Long, Rank: Int, NormalizedName: String
|
|||
case class MagPaperAuthorAffiliation(PaperId: Long, AuthorId: Long, AffiliationId: Option[Long], AuthorSequenceNumber: Int, OriginalAuthor: String, OriginalAffiliation: String) {}
|
||||
|
||||
|
||||
case class MagAuthorAffiliation(author: MagAuthor, affiliation:String)
|
||||
case class MagAuthorAffiliation(author: MagAuthor, affiliation:String, sequenceNumber:Int)
|
||||
|
||||
case class MagPaperWithAuthorList(PaperId: Long, authors: List[MagAuthorAffiliation]) {}
|
||||
|
||||
case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String) {}
|
||||
case class MagPaperAuthorDenormalized(PaperId: Long, author: MagAuthor, affiliation:String, sequenceNumber:Int) {}
|
||||
|
||||
case class MagPaperUrl(PaperId: Long, SourceType: Option[Int], SourceUrl: Option[String], LanguageCode: Option[String]) {}
|
||||
|
||||
|
@ -209,9 +209,9 @@ case object ConversionUtil {
|
|||
val authorsOAF = authors.authors.map { f: MagAuthorAffiliation =>
|
||||
|
||||
val a: eu.dnetlib.dhp.schema.oaf.Author = new eu.dnetlib.dhp.schema.oaf.Author
|
||||
|
||||
a.setFullname(f.author.DisplayName.get)
|
||||
|
||||
a.setRank(f.sequenceNumber)
|
||||
if (f.author.DisplayName.isDefined)
|
||||
a.setFullname(f.author.DisplayName.get)
|
||||
if(f.affiliation!= null)
|
||||
a.setAffiliation(List(asField(f.affiliation)).asJava)
|
||||
a.setPid(List(createSP(s"https://academic.microsoft.com/#/detail/${f.author.AuthorId}", "URL", PID_TYPES)).asJava)
|
||||
|
|
|
@ -58,13 +58,13 @@ object SparkProcessMAG {
|
|||
val paperAuthorAffiliation = spark.read.load(s"$sourcePath/PaperAuthorAffiliations").as[MagPaperAuthorAffiliation]
|
||||
|
||||
paperAuthorAffiliation.joinWith(authors, paperAuthorAffiliation("AuthorId").equalTo(authors("AuthorId")))
|
||||
.map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null)) }
|
||||
.map { case (a: MagPaperAuthorAffiliation, b: MagAuthor) => (a.AffiliationId, MagPaperAuthorDenormalized(a.PaperId, b, null, a.AuthorSequenceNumber)) }
|
||||
.joinWith(affiliation, affiliation("AffiliationId").equalTo(col("_1")), "left")
|
||||
.map(s => {
|
||||
val mpa = s._1._2
|
||||
val af = s._2
|
||||
if (af != null) {
|
||||
MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName)
|
||||
MagPaperAuthorDenormalized(mpa.PaperId, mpa.author, af.DisplayName, mpa.sequenceNumber)
|
||||
} else
|
||||
mpa
|
||||
}).groupBy("PaperId").agg(collect_list(struct($"author", $"affiliation")).as("authors"))
|
||||
|
|
|
@ -12,14 +12,6 @@
|
|||
"abstract": "<jats:p>A qualitative spot-test and tandem quantitative analysis of dipyrone in the bulk drugand in pharmaceutical preparations is proposed. The formation of a reddish-violet\u00a0 color indicates a positive result. In sequence a quantitative procedure can be performed in the same flask. The quantitative results obtained were statistically compared with those obtained with the method indicated by the Brazilian\u00a0 Pharmacopoeia, using the Student\u2019s t and the F tests. Considering the concentration in a 100 \u03bcL aliquot, the qualitative visual limit of detection is about 5\u00d710-6 g; instrumental LOD \u2245 1.4\u00d710-4 mol L-1 ; LOQ \u2245 4.5\u00d710-4 mol L-1.</jats:p>",
|
||||
"prefix": "10.26850",
|
||||
"author": [
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Matthieu",
|
||||
"family": "Tubino",
|
||||
"sequence": "first",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0002-1987-3907"
|
||||
},
|
||||
{
|
||||
"affiliation": [],
|
||||
"given": "A. C.",
|
||||
|
@ -49,6 +41,14 @@
|
|||
"sequence": "additional",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0001-5564-1639"
|
||||
},
|
||||
{
|
||||
"authenticated-orcid": false,
|
||||
"given": "Matthieu",
|
||||
"family": "Tubino",
|
||||
"sequence": "first",
|
||||
"affiliation": [],
|
||||
"ORCID": "http://orcid.org/0000-0002-1987-3907"
|
||||
}
|
||||
],
|
||||
"reference-count": 0,
|
||||
|
|
Loading…
Reference in New Issue