forked from D-Net/dnet-hadoop
minor changes
This commit is contained in:
parent
43f127448d
commit
8828458acf
|
@ -97,22 +97,23 @@ public class PrepareResultOrcidAssociationStep1 {
|
||||||
Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
|
Dataset<R> result = readPath(spark, inputResultPath, resultClazz);
|
||||||
result.createOrReplaceTempView("result");
|
result.createOrReplaceTempView("result");
|
||||||
|
|
||||||
String query = " select target resultId, author authorList"
|
String query =
|
||||||
+ " from (select id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
|
"SELECT target resultId, author authorList"
|
||||||
+ " from ( "
|
+ " FROM (SELECT id, collect_set(named_struct('name', name, 'surname', surname, 'fullname', fullname, 'orcid', orcid)) author "
|
||||||
+ " select id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
|
+ " FROM ( "
|
||||||
+ " from result "
|
+ " SELECT DISTINCT id, MyT.fullname, MyT.name, MyT.surname, MyP.value orcid "
|
||||||
+ " lateral view explode (author) a as MyT "
|
+ " FROM result "
|
||||||
+ " lateral view explode (MyT.pid) p as MyP "
|
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
||||||
+ " where MyP.qualifier.classid = 'ORCID') tmp "
|
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
||||||
+ " group by id) r_t "
|
+ " WHERE MyP.qualifier.classid = 'ORCID') tmp "
|
||||||
+ " join ("
|
+ " GROUP BY id) r_t "
|
||||||
+ " select source, target "
|
+ " JOIN ("
|
||||||
+ " from relation "
|
+ " SELECT source, target "
|
||||||
+ " where datainfo.deletedbyinference = false "
|
+ " FROM relation "
|
||||||
+ getConstraintList(" relclass = '", allowedsemrel)
|
+ " WHERE datainfo.deletedbyinference = false "
|
||||||
+ ") rel_rel "
|
+ getConstraintList(" relclass = '", allowedsemrel)
|
||||||
+ " on source = id";
|
+ " ) rel_rel "
|
||||||
|
+ " ON source = id";
|
||||||
spark
|
spark
|
||||||
.sql(query)
|
.sql(query)
|
||||||
.as(Encoders.bean(ResultOrcidList.class))
|
.as(Encoders.bean(ResultOrcidList.class))
|
||||||
|
|
|
@ -132,16 +132,16 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
|
private static boolean enrichAuthor(AutoritativeAuthor autoritative_author, Author author) {
|
||||||
boolean toaddpid = false;
|
boolean toaddpid = false;
|
||||||
|
|
||||||
if (StringUtils.isNoneEmpty(autoritative_author.getSurname())) {
|
if (StringUtils.isNotEmpty(autoritative_author.getSurname())) {
|
||||||
if (StringUtils.isNoneEmpty(author.getSurname())) {
|
if (StringUtils.isNotEmpty(author.getSurname())) {
|
||||||
if (autoritative_author
|
if (autoritative_author
|
||||||
.getSurname()
|
.getSurname()
|
||||||
.trim()
|
.trim()
|
||||||
.equalsIgnoreCase(author.getSurname().trim())) {
|
.equalsIgnoreCase(author.getSurname().trim())) {
|
||||||
|
|
||||||
// have the same surname. Check the name
|
// have the same surname. Check the name
|
||||||
if (StringUtils.isNoneEmpty(autoritative_author.getName())) {
|
if (StringUtils.isNotEmpty(autoritative_author.getName())) {
|
||||||
if (StringUtils.isNoneEmpty(author.getName())) {
|
if (StringUtils.isNotEmpty(author.getName())) {
|
||||||
if (autoritative_author
|
if (autoritative_author
|
||||||
.getName()
|
.getName()
|
||||||
.trim()
|
.trim()
|
||||||
|
@ -150,12 +150,14 @@ public class SparkOrcidToResultFromSemRelJob {
|
||||||
}
|
}
|
||||||
// they could be differently written (i.e. only the initials of the name
|
// they could be differently written (i.e. only the initials of the name
|
||||||
// in one of the two
|
// in one of the two
|
||||||
if (autoritative_author
|
else {
|
||||||
.getName()
|
if (autoritative_author
|
||||||
.trim()
|
.getName()
|
||||||
.substring(0, 0)
|
.trim()
|
||||||
.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
|
.substring(0, 0)
|
||||||
toaddpid = true;
|
.equalsIgnoreCase(author.getName().trim().substring(0, 0))) {
|
||||||
|
toaddpid = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue