1
0
Fork 0

minor changes

This commit is contained in:
Miriam Baglioni 2020-05-14 10:34:12 +02:00
parent 43f127448d
commit 8828458acf
2 changed files with 29 additions and 26 deletions

View File

@ -97,22 +97,23 @@ public class PrepareResultOrcidAssociationStep1 {
Dataset<R> result = readPath(spark, inputResultPath, resultClazz); Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
result.createOrReplaceTempView("result"); result.createOrReplaceTempView("result");
String query = " select target resultId, author authorList" String query =
+ " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author " "SELECT target resultId, author authorList"
+ " from ( " + " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
+ " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid " + " FROM ( "
+ " from result " + " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
+ " lateral view explode (author) a as MyT " + " FROM result "
+ " lateral view explode (MyT.pid) p as MyP " + " LATERAL VIEW EXPLODE (author) a AS MyT "
+ " where MyP.qualifier.classid = 'ORCID') tmp " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
+ " group by id) r_t " + " WHERE MyP.qualifier.classid = 'ORCID') tmp "
+ " join (" + " GROUP BY id) r_t "
+ " select source, target " + " JOIN ("
+ " from relation " + " SELECT source, target "
+ " where datainfo.deletedbyinference = false " + " FROM relation "
+ getConstraintList(" relclass = '", allowedsemrel) + " WHERE datainfo.deletedbyinference = false "
+ ") rel_rel " + getConstraintList(" relclass = '", allowedsemrel)
+ " on source = id"; + " ) rel_rel "
+ " ON source = id";
spark spark
.sql(query) .sql(query)
.as(Encoders.bean(ResultOrcidList.class)) .as(Encoders.bean(ResultOrcidList.class))

View File

@ -132,16 +132,16 @@ public class SparkOrcidToResultFromSemRelJob {
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) { private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
boolean toaddpid = false; boolean toaddpid = false;
if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) { if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
if (StringUtils.isNoneEmpty(author.getSurname())) { if (StringUtils.isNotEmpty(author.getSurname())) {
if (autoritative_author if (autoritative_author
.getSurname() .getSurname()
.trim() .trim()
.equalsIgnoreCase(author.getSurname().trim())) { .equalsIgnoreCase(author.getSurname().trim())) {
// have the same surname. Check the name // have the same surname. Check the name
if (StringUtils.isNoneEmpty(autoritative_author.getName())) { if (StringUtils.isNotEmpty(autoritative_author.getName())) {
if (StringUtils.isNoneEmpty(author.getName())) { if (StringUtils.isNotEmpty(author.getName())) {
if (autoritative_author if (autoritative_author
.getName() .getName()
.trim() .trim()
@ -150,12 +150,14 @@ public class SparkOrcidToResultFromSemRelJob {
} }
// they could be differently written (i.e. only the initials of the name // they could be differently written (i.e. only the initials of the name
// in one of the two // in one of the two
if (autoritative_author else {
.getName() if (autoritative_author
.trim() .getName()
.substring(0, 0) .trim()
.equalsIgnoreCase(author.getName().trim().substring(0, 0))) { .substring(0, 0)
toaddpid = true; .equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
toaddpid = true;
}
} }
} }
} }