diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java index d7224af78..0153e52d5 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oa/merge/AuthorMerger.java @@ -132,17 +132,6 @@ public class AuthorMerger { .trim(); } - static int hammingDist(String str1, String str2) { - if (str1.length() != str2.length()) - return Math.max(str1.length(), str2.length()); - int i = 0, count = 0; - while (i < str1.length()) { - if (str1.charAt(i) != str2.charAt(i)) - count++; - i++; - } - return count; - } private static String authorFieldToBeCompared(Author author) { if (StringUtils.isNotBlank(author.getSurname())) { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala index 645df8181..4822059c6 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/enrich/orcid/SparkEnrichGraphWithOrcidAuthors.scala @@ -2,10 +2,12 @@ package eu.dnetlib.dhp.enrich.orcid import eu.dnetlib.dhp.application.AbstractScalaApplication import eu.dnetlib.dhp.oa.merge.AuthorMerger +import eu.dnetlib.dhp.schema.common.ModelSupport import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software} import org.apache.spark.sql.functions._ import org.apache.spark.sql._ import org.slf4j.{Logger, LoggerFactory} +import scala.collection.JavaConverters._ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger) extends AbstractScalaApplication(propertyPath, args, log: Logger) { @@ -21,6 +23,8 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String] val targetPath = parser.get("targetPath") log.info(s"targetPath is '$targetPath'") val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath) +// ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result)) + enrichResult( spark, s"$graphPath/publication", @@ -63,7 +67,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String] .schema(enc.schema) .json(graphPath) .select(col("id"), col("datainfo"), col("instance")) - .where("datainfo.deletedbyinference = false") + .where("datainfo.deletedbyinference != true") .drop("datainfo") .withColumn("instances", explode(col("instance"))) .withColumn("pids", explode(col("instances.pid"))) @@ -109,7 +113,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String] .load(s"$inputPath/Works") .select(col("orcid"), explode(col("pids")).alias("identifier")) .where( - "identifier.schema = 'doi' or identifier.schema ='pmid' or identifier.schema ='pmc' or identifier.schema ='arxiv' or identifier.schema ='handle'" + "identifier.schema IN('doi','pmid','pmc','arxiv','handle')" ) val orcidPublication = orcidAuthors .join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))