minor changes

This commit is contained in:
Miriam Baglioni 2020-05-14 10:34:12 +02:00
parent 43f127448d
commit 8828458acf
2 changed files with 29 additions and 26 deletions

View File

@ -97,22 +97,23 @@ public class PrepareResultOrcidAssociationStep1 {
Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
result.createOrReplaceTempView("result");
String query = " select target resultId, author authorList"
+ " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
+ " from ( "
+ " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
+ " from result "
+ " lateral view explode (author) a as MyT "
+ " lateral view explode (MyT.pid) p as MyP "
+ " where MyP.qualifier.classid = 'ORCID') tmp "
+ " group by id) r_t "
+ " join ("
+ " select source, target "
+ " from relation "
+ " where datainfo.deletedbyinference = false "
String query =
"SELECT target resultId, author authorList"
+ " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
+ " FROM ( "
+ " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
+ " FROM result "
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
+ " WHERE MyP.qualifier.classid = 'ORCID') tmp "
+ " GROUP BY id) r_t "
+ " JOIN ("
+ " SELECT source, target "
+ " FROM relation "
+ " WHERE datainfo.deletedbyinference = false "
+ getConstraintList(" relclass = '", allowedsemrel)
+ ") rel_rel "
+ " on source = id";
+ " ) rel_rel "
+ " ON source = id";
spark
.sql(query)
.as(Encoders.bean(ResultOrcidList.class))

View File

@ -132,16 +132,16 @@ public class SparkOrcidToResultFromSemRelJob {
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
boolean toaddpid = false;
if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) {
if (StringUtils.isNoneEmpty(author.getSurname())) {
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
if (StringUtils.isNotEmpty(author.getSurname())) {
if (autoritative_author
.getSurname()
.trim()
.equalsIgnoreCase(author.getSurname().trim())) {
// have the same surname. Check the name
if (StringUtils.isNoneEmpty(autoritative_author.getName())) {
if (StringUtils.isNoneEmpty(author.getName())) {
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
if (StringUtils.isNotEmpty(author.getName())) {
if (autoritative_author
.getName()
.trim()
@ -150,6 +150,7 @@ public class SparkOrcidToResultFromSemRelJob {
}
// they could be differently written (i.e. only the initials of the name
// in one of the two
else {
if (autoritative_author
.getName()
.trim()
@ -162,6 +163,7 @@ public class SparkOrcidToResultFromSemRelJob {
}
}
}
}
if (toaddpid) {
StructuredProperty p = new StructuredProperty();
p.setValue(autoritative_author.getOrcid());