forked from D-Net/dnet-hadoop
Removed unused function
Applied PR Comment of Giambattista in the PR
This commit is contained in:
parent
cdfb7588dd
commit
bf0fd27c36
|
@ -132,17 +132,6 @@ public class AuthorMerger {
|
||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
static int hammingDist(String str1, String str2) {
|
|
||||||
if (str1.length() != str2.length())
|
|
||||||
return Math.max(str1.length(), str2.length());
|
|
||||||
int i = 0, count = 0;
|
|
||||||
while (i < str1.length()) {
|
|
||||||
if (str1.charAt(i) != str2.charAt(i))
|
|
||||||
count++;
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static String authorFieldToBeCompared(Author author) {
|
private static String authorFieldToBeCompared(Author author) {
|
||||||
if (StringUtils.isNotBlank(author.getSurname())) {
|
if (StringUtils.isNotBlank(author.getSurname())) {
|
||||||
|
|
|
@ -2,10 +2,12 @@ package eu.dnetlib.dhp.enrich.orcid
|
||||||
|
|
||||||
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
import eu.dnetlib.dhp.application.AbstractScalaApplication
|
||||||
import eu.dnetlib.dhp.oa.merge.AuthorMerger
|
import eu.dnetlib.dhp.oa.merge.AuthorMerger
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelSupport
|
||||||
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
|
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
|
||||||
import org.apache.spark.sql.functions._
|
import org.apache.spark.sql.functions._
|
||||||
import org.apache.spark.sql._
|
import org.apache.spark.sql._
|
||||||
import org.slf4j.{Logger, LoggerFactory}
|
import org.slf4j.{Logger, LoggerFactory}
|
||||||
|
import scala.collection.JavaConverters._
|
||||||
|
|
||||||
class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
|
class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
|
||||||
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
|
||||||
|
@ -21,6 +23,8 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
||||||
val targetPath = parser.get("targetPath")
|
val targetPath = parser.get("targetPath")
|
||||||
log.info(s"targetPath is '$targetPath'")
|
log.info(s"targetPath is '$targetPath'")
|
||||||
val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
|
val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
|
||||||
|
// ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
|
||||||
|
|
||||||
enrichResult(
|
enrichResult(
|
||||||
spark,
|
spark,
|
||||||
s"$graphPath/publication",
|
s"$graphPath/publication",
|
||||||
|
@ -63,7 +67,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
||||||
.schema(enc.schema)
|
.schema(enc.schema)
|
||||||
.json(graphPath)
|
.json(graphPath)
|
||||||
.select(col("id"), col("datainfo"), col("instance"))
|
.select(col("id"), col("datainfo"), col("instance"))
|
||||||
.where("datainfo.deletedbyinference = false")
|
.where("datainfo.deletedbyinference != true")
|
||||||
.drop("datainfo")
|
.drop("datainfo")
|
||||||
.withColumn("instances", explode(col("instance")))
|
.withColumn("instances", explode(col("instance")))
|
||||||
.withColumn("pids", explode(col("instances.pid")))
|
.withColumn("pids", explode(col("instances.pid")))
|
||||||
|
@ -109,7 +113,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
|
||||||
.load(s"$inputPath/Works")
|
.load(s"$inputPath/Works")
|
||||||
.select(col("orcid"), explode(col("pids")).alias("identifier"))
|
.select(col("orcid"), explode(col("pids")).alias("identifier"))
|
||||||
.where(
|
.where(
|
||||||
"identifier.schema = 'doi' or identifier.schema ='pmid' or identifier.schema ='pmc' or identifier.schema ='arxiv' or identifier.schema ='handle'"
|
"identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
|
||||||
)
|
)
|
||||||
val orcidPublication = orcidAuthors
|
val orcidPublication = orcidAuthors
|
||||||
.join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))
|
.join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))
|
||||||
|
|
Loading…
Reference in New Issue