diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 3e16b4b4b..7cd057cf3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -97,22 +97,23 @@ public class PrepareResultOrcidAssociationStep1 { Dataset result = readPath(spark, inputResultPath, resultClazz); result.createOrReplaceTempView("result"); - String query = " select target resultId, author authorList" - + " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author " - + " from ( " - + " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid " - + " from result " - + " lateral view explode (author) a as MyT " - + " lateral view explode (MyT.pid) p as MyP " - + " where MyP.qualifier.classid = 'ORCID') tmp " - + " group by id) r_t " - + " join (" - + " select source, target " - + " from relation " - + " where datainfo.deletedbyinference = false " - + getConstraintList(" relclass = '", allowedsemrel) - + ") rel_rel " - + " on source = id"; + String query = + "SELECT target resultId, author authorList" + + " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author " + + " FROM ( " + + " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid " + + " FROM result " + + " LATERAL VIEW EXPLODE (author) a AS MyT " + + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " + + " WHERE MyP.qualifier.classid = 'ORCID') tmp " + + " GROUP BY id) r_t " + + " JOIN (" + + " SELECT source, target " + + " FROM relation " + + " WHERE datainfo.deletedbyinference = false " + + getConstraintList(" relclass = '", allowedsemrel) + + " ) rel_rel " + + " ON source = id"; spark .sql(query) .as(Encoders.bean(ResultOrcidList.class)) diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index ebb75a5a6..b93b66d9f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -132,16 +132,16 @@ public class SparkOrcidToResultFromSemRelJob { private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { boolean toaddpid = false; - if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) { - if (StringUtils.isNoneEmpty(author.getSurname())) { + if (StringUtils.isNotEmpty(autoritative_author.getSurname())) { + if (StringUtils.isNotEmpty(author.getSurname())) { if (autoritative_author .getSurname() .trim() .equalsIgnoreCase(author.getSurname().trim())) { // have the same surname. Check the name - if (StringUtils.isNoneEmpty(autoritative_author.getName())) { - if (StringUtils.isNoneEmpty(author.getName())) { + if (StringUtils.isNotEmpty(autoritative_author.getName())) { + if (StringUtils.isNotEmpty(author.getName())) { if (autoritative_author .getName() .trim() @@ -150,12 +150,14 @@ public class SparkOrcidToResultFromSemRelJob { } // they could be differently written (i.e. only the initials of the name // in one of the two - if (autoritative_author - .getName() - .trim() - .substring(0, 0) - .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { - toaddpid = true; + else { + if (autoritative_author + .getName() + .trim() + .substring(0, 0) + .equalsIgnoreCase(author.getName().trim().substring(0, 0))) { + toaddpid = true; + } } } }