relclass for relation and corresponding values have been put to lower case (isSupplementedBy wrote as IsSupplementedBy - orcid propagation)

This commit is contained in:
Miriam Baglioni 2020-08-18 16:42:08 +02:00
parent f44dd5d886
commit 55e24c2547
7 changed files with 20 additions and 39 deletions

View File

@ -109,9 +109,9 @@ public class PropagationConstant {
} }
public static String getConstraintList(String text, List<String> constraints) { public static String getConstraintList(String text, List<String> constraints) {
String ret = " and (" + text + constraints.get(0) + "'"; String ret = " and (" + text + constraints.get(0).toLowerCase() + "'";
for (int i = 1; i < constraints.size(); i++) { for (int i = 1; i < constraints.size(); i++) {
ret += " OR " + text + constraints.get(i) + "'"; ret += " OR " + text + constraints.get(i).toLowerCase() + "'";
} }
ret += ")"; ret += ")";
return ret; return ret;

View File

@ -96,27 +96,6 @@ public class PrepareDatasourceCountryAssociation {
relation.createOrReplaceTempView("relation"); relation.createOrReplaceTempView("relation");
organization.createOrReplaceTempView("organization"); organization.createOrReplaceTempView("organization");
// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
// + "FROM ( SELECT id "
// + " FROM datasource "
// + " WHERE (datainfo.deletedbyinference = false "
// + whitelisted
// + ") "
// + getConstraintList("datasourcetype.classid = '", allowedtypes)
// + ") d "
// + "JOIN ( SELECT source, target "
// + " FROM relation "
// + " WHERE relclass = '"
// + ModelConstants.IS_PROVIDED_BY
// + "' "
// + " AND datainfo.deletedbyinference = false ) rel "
// + "ON d.id = rel.source "
// + "JOIN (SELECT id, country "
// + " FROM organization "
// + " WHERE datainfo.deletedbyinference = false "
// + " AND length(country.classid) > 0) o "
// + "ON o.id = rel.target";
String query = "SELECT source dataSourceId, " + String query = "SELECT source dataSourceId, " +
"named_struct('classid', country.classid, 'classname', country.classname) country " + "named_struct('classid', country.classid, 'classname', country.classname) country " +
"FROM datasource d " + "FROM datasource d " +
@ -125,7 +104,7 @@ public class PrepareDatasourceCountryAssociation {
"JOIN organization o " + "JOIN organization o " +
"ON o.id = rel.target " + "ON o.id = rel.target " +
"WHERE rel.datainfo.deletedbyinference = false " + "WHERE rel.datainfo.deletedbyinference = false " +
"and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" + "and lower(rel.relclass) = '" + ModelConstants.IS_PROVIDED_BY.toLowerCase() + "'" +
"and o.datainfo.deletedbyinference = false " + "and o.datainfo.deletedbyinference = false " +
"and length(o.country.classid) > 0 " + "and length(o.country.classid) > 0 " +
"and (" + allowed + " or " + whitelisted + ")"; "and (" + allowed + " or " + whitelisted + ")";

View File

@ -102,15 +102,17 @@ public class PrepareResultOrcidAssociationStep1 {
+ " FROM result " + " FROM result "
+ " LATERAL VIEW EXPLODE (author) a AS MyT " + " LATERAL VIEW EXPLODE (author) a AS MyT "
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP " + " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
+ " WHERE MyP.qualifier.classid = 'ORCID') tmp " + " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp "
+ " GROUP BY id) r_t " + " GROUP BY id) r_t "
+ " JOIN (" + " JOIN ("
+ " SELECT source, target " + " SELECT source, target "
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ getConstraintList(" relclass = '", allowedsemrel) + getConstraintList(" lower(relclass) = '", allowedsemrel)
+ " ) rel_rel " + " ) rel_rel "
+ " ON source = id"; + " ON source = id";
log.info("executedQuery: {}", query);
spark spark
.sql(query) .sql(query)
.as(Encoders.bean(ResultOrcidList.class)) .as(Encoders.bean(ResultOrcidList.class))

View File

@ -85,8 +85,8 @@ public class PrepareProjectResultsAssociation {
String resproj_relation_query = "SELECT source, target " String resproj_relation_query = "SELECT source, target "
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ " AND relClass = '" + " AND lower(relClass) = '"
+ ModelConstants.IS_PRODUCED_BY + ModelConstants.IS_PRODUCED_BY.toLowerCase()
+ "'"; + "'";
Dataset<Row> resproj_relation = spark.sql(resproj_relation_query); Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
@ -98,7 +98,7 @@ public class PrepareProjectResultsAssociation {
+ " FROM (SELECT source, target " + " FROM (SELECT source, target "
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ getConstraintList(" relClass = '", allowedsemrel) + getConstraintList(" lower(relClass) = '", allowedsemrel)
+ " ) r1" + " ) r1"
+ " JOIN resproj_relation r2 " + " JOIN resproj_relation r2 "
+ " ON r1.source = r2.source " + " ON r1.source = r2.source "

View File

@ -76,14 +76,14 @@ public class PrepareResultCommunitySet {
+ "FROM (SELECT source, target " + "FROM (SELECT source, target "
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ " AND relClass = '" + " AND lower(relClass) = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
+ "') result_organization " + "') result_organization "
+ "LEFT JOIN (SELECT source, collect_set(target) org_set " + "LEFT JOIN (SELECT source, collect_set(target) org_set "
+ " FROM relation " + " FROM relation "
+ " WHERE datainfo.deletedbyinference = false " + " WHERE datainfo.deletedbyinference = false "
+ " AND relClass = '" + " AND lower(relClass) = '"
+ ModelConstants.MERGES + ModelConstants.MERGES.toLowerCase()
+ "' " + "' "
+ " GROUP BY source) organization_organization " + " GROUP BY source) organization_organization "
+ "ON result_organization.target = organization_organization.source "; + "ON result_organization.target = organization_organization.source ";

View File

@ -144,8 +144,8 @@ public class PrepareResultCommunitySetStep1 {
String resultContextQuery = String String resultContextQuery = String
.format( .format(
RESULT_CONTEXT_QUERY_TEMPLATE, RESULT_CONTEXT_QUERY_TEMPLATE,
getConstraintList(" co.id = '", communityIdList), getConstraintList(" lower(co.id) = '", communityIdList),
getConstraintList(" relClass = '", allowedsemrel)); getConstraintList(" lower(relClass) = '", allowedsemrel));
Dataset<Row> result_context = spark.sql(resultContextQuery); Dataset<Row> result_context = spark.sql(resultContextQuery);
result_context.createOrReplaceTempView("result_context"); result_context.createOrReplaceTempView("result_context");

View File

@ -91,8 +91,8 @@ public class PrepareResultInstRepoAssociation {
+ "AND datainfo.deletedbyinference = false ) d " + "AND datainfo.deletedbyinference = false ) d "
+ "JOIN ( SELECT source, target " + "JOIN ( SELECT source, target "
+ "FROM relation " + "FROM relation "
+ "WHERE relclass = '" + "WHERE lower(relclass) = '"
+ ModelConstants.IS_PROVIDED_BY + ModelConstants.IS_PROVIDED_BY.toLowerCase()
+ "' " + "' "
+ "AND datainfo.deletedbyinference = false ) rel " + "AND datainfo.deletedbyinference = false ) rel "
+ "ON d.id = rel.source "; + "ON d.id = rel.source ";
@ -111,8 +111,8 @@ public class PrepareResultInstRepoAssociation {
String query = "Select source resultId, collect_set(target) organizationSet " String query = "Select source resultId, collect_set(target) organizationSet "
+ "from relation " + "from relation "
+ "where datainfo.deletedbyinference = false " + "where datainfo.deletedbyinference = false "
+ "and relClass = '" + "and lower(relClass) = '"
+ ModelConstants.HAS_AUTHOR_INSTITUTION + ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
+ "' " + "' "
+ "group by source"; + "group by source";