ORCID Enrichment and Download #364
|
@ -4,38 +4,13 @@ package eu.dnetlib.dhp.oa.merge;
|
|||
import java.text.Normalizer;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import com.wcohen.ss.JaroWinkler;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
import eu.dnetlib.pace.model.Person;
|
||||
import scala.Tuple2;
|
||||
|
||||
class SimilarityCellInfo implements Comparable<SimilarityCellInfo> {
|
||||
|
||||
public int authorPosition = 0;
|
||||
public int orcidPosition = 0;
|
||||
|
||||
public double maxColumnSimilarity = 0.0;
|
||||
|
||||
public SimilarityCellInfo() {
|
||||
}
|
||||
|
||||
public void setValues(final int authPos, final int orcidPos, final double similarity) {
|
||||
this.authorPosition = authPos;
|
||||
this.orcidPosition = orcidPos;
|
||||
this.maxColumnSimilarity = similarity;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(@NotNull SimilarityCellInfo o) {
|
||||
return Double.compare(maxColumnSimilarity, o.maxColumnSimilarity);
|
||||
}
|
||||
}
|
||||
|
||||
public class AuthorMerger {
|
||||
|
||||
|
@ -183,6 +158,7 @@ public class AuthorMerger {
|
|||
/**
|
||||
* This method tries to figure out when two author are the same in the contest
|
||||
* of ORCID enrichment
|
||||
*
|
||||
* @param left Author in the OAF entity
|
||||
* @param right Author ORCID
|
||||
* @return based on a heuristic on the names of the authors if they are the same.
|
||||
|
@ -238,6 +214,7 @@ public class AuthorMerger {
|
|||
|
||||
/**
|
||||
* Method to enrich ORCID information in one list of authors based on another list
|
||||
*
|
||||
* @param baseAuthor the Author List in the OAF Entity
|
||||
* @param orcidAuthor The list of ORCID Author intersected
|
||||
* @return The Author List of the OAF Entity enriched with the orcid Author
|
||||
|
|
|
@ -92,7 +92,6 @@ object SparkGenerateDoiBoost {
|
|||
.mode(SaveMode.Overwrite)
|
||||
.save(s"$workingDirPath/firstJoin")
|
||||
|
||||
|
||||
logger.info("Phase 2) Join Result with MAG")
|
||||
val sj: Dataset[(String, Publication)] =
|
||||
spark.read.load(s"$workingDirPath/firstJoin").as[Publication].map(p => (p.getId, p))
|
||||
|
|
|
@ -6,13 +6,10 @@ import org.junit.jupiter.api.Test
|
|||
import org.slf4j.{Logger, LoggerFactory}
|
||||
import org.apache.spark.sql.functions._
|
||||
|
||||
|
||||
class EnrichOrcidTest {
|
||||
|
||||
val log: Logger = LoggerFactory.getLogger(getClass)
|
||||
|
||||
|
||||
|
||||
def test() = {
|
||||
val spark = SparkSession.builder().master("local[*]").getOrCreate()
|
||||
// spark.sparkContext.setLogLevel("ERROR")
|
||||
|
@ -63,7 +60,6 @@ class EnrichOrcidTest {
|
|||
// }).filter(author => author != null)
|
||||
// })
|
||||
|
||||
|
||||
Encoders
|
||||
import spark.implicits._
|
||||
|
||||
|
@ -76,10 +72,6 @@ class EnrichOrcidTest {
|
|||
//
|
||||
// .show()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue