forked from D-Net/dnet-hadoop
Modified last intersection since we lost many titles.
this is my last resource, after that, I've to change my job
This commit is contained in:
parent
d5b29d96a7
commit
d580e15442
|
@ -60,7 +60,7 @@ object SparkGenerateDoiBoost {
|
||||||
val openaireOrganizationPath = parser.get("openaireOrganizationPath")
|
val openaireOrganizationPath = parser.get("openaireOrganizationPath")
|
||||||
|
|
||||||
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
|
val crossrefAggregator = new Aggregator[(String, Publication), Publication, Publication] with Serializable {
|
||||||
override def zero: Publication = new Publication
|
override def zero: Publication = null
|
||||||
|
|
||||||
override def reduce(b: Publication, a: (String, Publication)): Publication = {
|
override def reduce(b: Publication, a: (String, Publication)): Publication = {
|
||||||
|
|
||||||
|
@ -177,12 +177,43 @@ object SparkGenerateDoiBoost {
|
||||||
.map(DoiBoostMappingUtil.fixPublication)
|
.map(DoiBoostMappingUtil.fixPublication)
|
||||||
.map(p => (p.getId, p))
|
.map(p => (p.getId, p))
|
||||||
.groupByKey(_._1)
|
.groupByKey(_._1)
|
||||||
.agg(crossrefAggregator.toColumn)
|
.reduceGroups((left, right) =>
|
||||||
.map(p => p._2)
|
{
|
||||||
|
//Check left is not null
|
||||||
|
if (left != null && left._1 != null)
|
||||||
|
{
|
||||||
|
//If right is null then return left
|
||||||
|
if (right == null || right._2 == null)
|
||||||
|
left
|
||||||
|
else {
|
||||||
|
// Here Left and Right are not null
|
||||||
|
// So we have to merge
|
||||||
|
val b1 = left._2
|
||||||
|
val b2 = right._2
|
||||||
|
b1.mergeFrom(b2)
|
||||||
|
b1.mergeOAFDataInfo(b2)
|
||||||
|
val authors = AuthorMerger.mergeAuthor(b1.getAuthor, b2.getAuthor)
|
||||||
|
b1.setAuthor(authors)
|
||||||
|
if (b2.getId != null && b2.getId.nonEmpty)
|
||||||
|
b1.setId(b2.getId)
|
||||||
|
//Return publication Merged
|
||||||
|
(b1.getId, b1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Left is Null so we return right
|
||||||
|
right
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
)
|
||||||
|
.filter(s => s!= null && s._2!=null)
|
||||||
|
.map(s => s._2)
|
||||||
.write
|
.write
|
||||||
.mode(SaveMode.Overwrite)
|
.mode(SaveMode.Overwrite)
|
||||||
.save(s"$workingDirPath/doiBoostPublicationFiltered")
|
.save(s"$workingDirPath/doiBoostPublicationFiltered")
|
||||||
|
|
||||||
|
|
||||||
val affiliationPath = parser.get("affiliationPath")
|
val affiliationPath = parser.get("affiliationPath")
|
||||||
val paperAffiliationPath = parser.get("paperAffiliationPath")
|
val paperAffiliationPath = parser.get("paperAffiliationPath")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue