[stats wf] indicators across stats dbs & updates in the org ids #248

Closed
dimitris.pierrakos wants to merge 1742 commits from beta into beta2master_sept_2022
2 changed files with 6 additions and 13 deletions
Showing only changes of commit bf0fd27c36 - Show all commits

View File

@ -132,17 +132,6 @@ public class AuthorMerger {
.trim();
}
static int hammingDist(String str1, String str2) {
if (str1.length() != str2.length())
return Math.max(str1.length(), str2.length());
int i = 0, count = 0;
while (i < str1.length()) {
if (str1.charAt(i) != str2.charAt(i))
count++;
i++;
}
return count;
}
private static String authorFieldToBeCompared(Author author) {
if (StringUtils.isNotBlank(author.getSurname())) {

View File

@ -2,10 +2,12 @@ package eu.dnetlib.dhp.enrich.orcid
import eu.dnetlib.dhp.application.AbstractScalaApplication
import eu.dnetlib.dhp.oa.merge.AuthorMerger
import eu.dnetlib.dhp.schema.common.ModelSupport
import eu.dnetlib.dhp.schema.oaf.{OtherResearchProduct, Publication, Result, Software}
import org.apache.spark.sql.functions._
import org.apache.spark.sql._
import org.slf4j.{Logger, LoggerFactory}
import scala.collection.JavaConverters._
class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String], log: Logger)
extends AbstractScalaApplication(propertyPath, args, log: Logger) {
@ -21,6 +23,8 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
val targetPath = parser.get("targetPath")
log.info(s"targetPath is '$targetPath'")
val orcidPublication: Dataset[Row] = generateOrcidTable(spark, orcidPath)
// ModelSupport.entityTypes.entrySet().asScala.filter(k => k.getKey.getClass isInstance(Result))
enrichResult(
spark,
s"$graphPath/publication",
@ -63,7 +67,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
.schema(enc.schema)
.json(graphPath)
.select(col("id"), col("datainfo"), col("instance"))
.where("datainfo.deletedbyinference = false")
.where("datainfo.deletedbyinference != true")
.drop("datainfo")
.withColumn("instances", explode(col("instance")))
.withColumn("pids", explode(col("instances.pid")))
@ -109,7 +113,7 @@ class SparkEnrichGraphWithOrcidAuthors(propertyPath: String, args: Array[String]
.load(s"$inputPath/Works")
.select(col("orcid"), explode(col("pids")).alias("identifier"))
.where(
"identifier.schema = 'doi' or identifier.schema ='pmid' or identifier.schema ='pmc' or identifier.schema ='arxiv' or identifier.schema ='handle'"
"identifier.schema IN('doi','pmid','pmc','arxiv','handle')"
)
val orcidPublication = orcidAuthors
.join(orcidWorks, orcidAuthors("orcid").equalTo(orcidWorks("orcid")))