Removed unused function

Applied PR Comment of Giambattista in the PR
This commit is contained in:
Sandro La Bruzzo 2023-12-01 12:16:42 +01:00
parent cdfb7588dd
commit bf0fd27c36
2 changed files with 6 additions and 13 deletions

View File

@ -132,17 +132,6 @@ public class AuthorMerger {
.trim();
}
static int hammingDist(String str1, String str2) {
if (str1.length() != str2.length())
return Math.max(str1.length(), str2.length());
int i = 0, count = 0;
while (i < str1.length()) {
if (str1.charAt(i) != str2.charAt(i))
count++;
i++;
}
return count;
}
private static String authorFieldToBeCompared(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {

View File

@ -2,10 +2,12 @@ package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.oa.merge.AuthorMerger
import eu.dnetlib.dhp.schema.common.ModelSupport
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
@ -21,6 +23,8 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
val targetPath = parser.get("targetPath")
log.info(s"targetPath is '$targetPath'")
val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
// ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
enrichResult(
spark,
s"$graphPath/publication",
@ -63,7 +67,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
.schema(enc.schema)
.json(graphPath)
.select(col("id"), col("datainfo"), col("instance"))
.where("datainfo.deletedbyinference = false")
.where("datainfo.deletedbyinference != true")
.drop("datainfo")
.withColumn("instances", explode(col("instance")))
.withColumn("pids", explode(col("instances.pid")))
@ -109,7 +113,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
.load(s"$inputPath/Works")
.select(col("orcid"), explode(col("pids")).alias("identifier"))
.where(
"identifier.schema = 'doi' or identifier.schema ='pmid' or identifier.schema ='pmc' or identifier.schema ='arxiv' or identifier.schema ='handle'"
"identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
)
val orcidPublication = orcidAuthors
.join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))