Modified last intersection since we lost many titles.

this is my last resource, after that, I've to  change my job
This commit is contained in:
Sandro La Bruzzo 2022-04-21 11:06:08 +02:00
parent d5b29d96a7
commit d580e15442
1 changed files with 34 additions and 3 deletions

View File

@ -60,7 +60,7 @@ object SparkGenerateDoiBoost {
val openaireOrganizationPath = parser.get("openaireOrganizationPath") val openaireOrganizationPath = parser.get("openaireOrganizationPath")
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable { val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
override def zero: Publication = new Publication override def zero: Publication = null
override def reduce(b: Publication, a: (String, Publication)): Publication = { override def reduce(b: Publication, a: (String, Publication)): Publication = {
@ -177,12 +177,43 @@ object SparkGenerateDoiBoost {
.map(DoiBoostMappingUtil.fixPublication) .map(DoiBoostMappingUtil.fixPublication)
.map(p => (p.getId, p)) .map(p => (p.getId, p))
.groupByKey(_._1) .groupByKey(_._1)
.agg(crossrefAggregator.toColumn) .reduceGroups((left, right) =>
.map(p => p._2) {
//Check left is not null
if (left != null && left._1 != null)
{
//If right is null then return left
if (right == null || right._2 == null)
left
else {
// Here Left and Right are not null
// So we have to merge
val b1 = left._2
val b2 = right._2
b1.mergeFrom(b2)
b1.mergeOAFDataInfo(b2)
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
b1.setAuthor(authors)
if (b2.getId != null && b2.getId.nonEmpty)
b1.setId(b2.getId)
//Return publication Merged
(b1.getId, b1)
}
}
else {
// Left is Null so we return right
right
}
}
)
.filter(s => s!= null && s._2!=null)
.map(s => s._2)
.write .write
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.save(s"$workingDirPath/doiBoostPublicationFiltered") .save(s"$workingDirPath/doiBoostPublicationFiltered")
val affiliationPath = parser.get("affiliationPath") val affiliationPath = parser.get("affiliationPath")
val paperAffiliationPath = parser.get("paperAffiliationPath") val paperAffiliationPath = parser.get("paperAffiliationPath")