forked from D-Net/dnet-hadoop
minor changes
This commit is contained in:
parent
43f127448d
commit
8828458acf
|
@ -97,22 +97,23 @@ public class PrepareResultOrcidAssociationStep1 {
|
|||
Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
|
||||
result.createOrReplaceTempView("result");
|
||||
|
||||
String query = " select target resultId, author authorList"
|
||||
+ " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
|
||||
+ " from ( "
|
||||
+ " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
|
||||
+ " from result "
|
||||
+ " lateral view explode (author) a as MyT "
|
||||
+ " lateral view explode (MyT.pid) p as MyP "
|
||||
+ " where MyP.qualifier.classid = 'ORCID') tmp "
|
||||
+ " group by id) r_t "
|
||||
+ " join ("
|
||||
+ " select source, target "
|
||||
+ " from relation "
|
||||
+ " where datainfo.deletedbyinference = false "
|
||||
+ getConstraintList(" relclass = '", allowedsemrel)
|
||||
+ ") rel_rel "
|
||||
+ " on source = id";
|
||||
String query =
|
||||
"SELECT target resultId, author authorList"
|
||||
+ " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
|
||||
+ " FROM ( "
|
||||
+ " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
|
||||
+ " FROM result "
|
||||
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
||||
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
||||
+ " WHERE MyP.qualifier.classid = 'ORCID') tmp "
|
||||
+ " GROUP BY id) r_t "
|
||||
+ " JOIN ("
|
||||
+ " SELECT source, target "
|
||||
+ " FROM relation "
|
||||
+ " WHERE datainfo.deletedbyinference = false "
|
||||
+ getConstraintList(" relclass = '", allowedsemrel)
|
||||
+ " ) rel_rel "
|
||||
+ " ON source = id";
|
||||
spark
|
||||
.sql(query)
|
||||
.as(Encoders.bean(ResultOrcidList.class))
|
||||
|
|
|
@ -132,16 +132,16 @@ public class SparkOrcidToResultFromSemRelJob {
|
|||
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
|
||||
boolean toaddpid = false;
|
||||
|
||||
if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) {
|
||||
if (StringUtils.isNoneEmpty(author.getSurname())) {
|
||||
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
|
||||
if (StringUtils.isNotEmpty(author.getSurname())) {
|
||||
if (autoritative_author
|
||||
.getSurname()
|
||||
.trim()
|
||||
.equalsIgnoreCase(author.getSurname().trim())) {
|
||||
|
||||
// have the same surname. Check the name
|
||||
if (StringUtils.isNoneEmpty(autoritative_author.getName())) {
|
||||
if (StringUtils.isNoneEmpty(author.getName())) {
|
||||
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
|
||||
if (StringUtils.isNotEmpty(author.getName())) {
|
||||
if (autoritative_author
|
||||
.getName()
|
||||
.trim()
|
||||
|
@ -150,12 +150,14 @@ public class SparkOrcidToResultFromSemRelJob {
|
|||
}
|
||||
// they could be differently written (i.e. only the initials of the name
|
||||
// in one of the two
|
||||
if (autoritative_author
|
||||
.getName()
|
||||
.trim()
|
||||
.substring(0, 0)
|
||||
.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
|
||||
toaddpid = true;
|
||||
else {
|
||||
if (autoritative_author
|
||||
.getName()
|
||||
.trim()
|
||||
.substring(0, 0)
|
||||
.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
|
||||
toaddpid = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue