forked from D-Net/dnet-hadoop
merge branch with master
This commit is contained in:
commit
85203c16e3
|
@ -109,9 +109,9 @@ public class PropagationConstant {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getConstraintList(String text, List<String> constraints) {
|
public static String getConstraintList(String text, List<String> constraints) {
|
||||||
String ret = " and (" + text + constraints.get(0) + "'";
|
String ret = " and (" + text + constraints.get(0).toLowerCase() + "'";
|
||||||
for (int i = 1; i < constraints.size(); i++) {
|
for (int i = 1; i < constraints.size(); i++) {
|
||||||
ret += " OR " + text + constraints.get(i) + "'";
|
ret += " OR " + text + constraints.get(i).toLowerCase() + "'";
|
||||||
}
|
}
|
||||||
ret += ")";
|
ret += ")";
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -96,27 +96,6 @@ public class PrepareDatasourceCountryAssociation {
|
||||||
relation.createOrReplaceTempView("relation");
|
relation.createOrReplaceTempView("relation");
|
||||||
organization.createOrReplaceTempView("organization");
|
organization.createOrReplaceTempView("organization");
|
||||||
|
|
||||||
// String query = "SELECT source dataSourceId, named_struct('classid', country.classid, 'classname', country.classname) country "
|
|
||||||
// + "FROM ( SELECT id "
|
|
||||||
// + " FROM datasource "
|
|
||||||
// + " WHERE (datainfo.deletedbyinference = false "
|
|
||||||
// + whitelisted
|
|
||||||
// + ") "
|
|
||||||
// + getConstraintList("datasourcetype.classid = '", allowedtypes)
|
|
||||||
// + ") d "
|
|
||||||
// + "JOIN ( SELECT source, target "
|
|
||||||
// + " FROM relation "
|
|
||||||
// + " WHERE relclass = '"
|
|
||||||
// + ModelConstants.IS_PROVIDED_BY
|
|
||||||
// + "' "
|
|
||||||
// + " AND datainfo.deletedbyinference = false ) rel "
|
|
||||||
// + "ON d.id = rel.source "
|
|
||||||
// + "JOIN (SELECT id, country "
|
|
||||||
// + " FROM organization "
|
|
||||||
// + " WHERE datainfo.deletedbyinference = false "
|
|
||||||
// + " AND length(country.classid) > 0) o "
|
|
||||||
// + "ON o.id = rel.target";
|
|
||||||
|
|
||||||
String query = "SELECT source dataSourceId, " +
|
String query = "SELECT source dataSourceId, " +
|
||||||
"named_struct('classid', country.classid, 'classname', country.classname) country " +
|
"named_struct('classid', country.classid, 'classname', country.classname) country " +
|
||||||
"FROM datasource d " +
|
"FROM datasource d " +
|
||||||
|
@ -125,7 +104,7 @@ public class PrepareDatasourceCountryAssociation {
|
||||||
"JOIN organization o " +
|
"JOIN organization o " +
|
||||||
"ON o.id = rel.target " +
|
"ON o.id = rel.target " +
|
||||||
"WHERE rel.datainfo.deletedbyinference = false " +
|
"WHERE rel.datainfo.deletedbyinference = false " +
|
||||||
"and rel.relclass = '" + ModelConstants.IS_PROVIDED_BY + "'" +
|
"and lower(rel.relclass) = '" + ModelConstants.IS_PROVIDED_BY.toLowerCase() + "'" +
|
||||||
"and o.datainfo.deletedbyinference = false " +
|
"and o.datainfo.deletedbyinference = false " +
|
||||||
"and length(o.country.classid) > 0 " +
|
"and length(o.country.classid) > 0 " +
|
||||||
"and (" + allowed + " or " + whitelisted + ")";
|
"and (" + allowed + " or " + whitelisted + ")";
|
||||||
|
|
|
@ -102,15 +102,17 @@ public class PrepareResultOrcidAssociationStep1 {
|
||||||
+ " FROM result "
|
+ " FROM result "
|
||||||
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
+ " LATERAL VIEW EXPLODE (author) a AS MyT "
|
||||||
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
+ " LATERAL VIEW EXPLODE (MyT.pid) p AS MyP "
|
||||||
+ " WHERE MyP.qualifier.classid = 'ORCID') tmp "
|
+ " WHERE lower(MyP.qualifier.classid) = 'orcid') tmp "
|
||||||
+ " GROUP BY id) r_t "
|
+ " GROUP BY id) r_t "
|
||||||
+ " JOIN ("
|
+ " JOIN ("
|
||||||
+ " SELECT source, target "
|
+ " SELECT source, target "
|
||||||
+ " FROM relation "
|
+ " FROM relation "
|
||||||
+ " WHERE datainfo.deletedbyinference = false "
|
+ " WHERE datainfo.deletedbyinference = false "
|
||||||
+ getConstraintList(" relclass = '", allowedsemrel)
|
+ getConstraintList(" lower(relclass) = '", allowedsemrel)
|
||||||
+ " ) rel_rel "
|
+ " ) rel_rel "
|
||||||
+ " ON source = id";
|
+ " ON source = id";
|
||||||
|
|
||||||
|
log.info("executedQuery: {}", query);
|
||||||
spark
|
spark
|
||||||
.sql(query)
|
.sql(query)
|
||||||
.as(Encoders.bean(ResultOrcidList.class))
|
.as(Encoders.bean(ResultOrcidList.class))
|
||||||
|
|
|
@ -85,8 +85,8 @@ public class PrepareProjectResultsAssociation {
|
||||||
String resproj_relation_query = "SELECT source, target "
|
String resproj_relation_query = "SELECT source, target "
|
||||||
+ " FROM relation "
|
+ " FROM relation "
|
||||||
+ " WHERE datainfo.deletedbyinference = false "
|
+ " WHERE datainfo.deletedbyinference = false "
|
||||||
+ " AND relClass = '"
|
+ " AND lower(relClass) = '"
|
||||||
+ ModelConstants.IS_PRODUCED_BY
|
+ ModelConstants.IS_PRODUCED_BY.toLowerCase()
|
||||||
+ "'";
|
+ "'";
|
||||||
|
|
||||||
Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
|
Dataset<Row> resproj_relation = spark.sql(resproj_relation_query);
|
||||||
|
@ -98,7 +98,7 @@ public class PrepareProjectResultsAssociation {
|
||||||
+ " FROM (SELECT source, target "
|
+ " FROM (SELECT source, target "
|
||||||
+ " FROM relation "
|
+ " FROM relation "
|
||||||
+ " WHERE datainfo.deletedbyinference = false "
|
+ " WHERE datainfo.deletedbyinference = false "
|
||||||
+ getConstraintList(" relClass = '", allowedsemrel)
|
+ getConstraintList(" lower(relClass) = '", allowedsemrel)
|
||||||
+ " ) r1"
|
+ " ) r1"
|
||||||
+ " JOIN resproj_relation r2 "
|
+ " JOIN resproj_relation r2 "
|
||||||
+ " ON r1.source = r2.source "
|
+ " ON r1.source = r2.source "
|
||||||
|
|
|
@ -76,14 +76,14 @@ public class PrepareResultCommunitySet {
|
||||||
+ "FROM (SELECT source, target "
|
+ "FROM (SELECT source, target "
|
||||||
+ " FROM relation "
|
+ " FROM relation "
|
||||||
+ " WHERE datainfo.deletedbyinference = false "
|
+ " WHERE datainfo.deletedbyinference = false "
|
||||||
+ " AND relClass = '"
|
+ " AND lower(relClass) = '"
|
||||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION
|
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
|
||||||
+ "') result_organization "
|
+ "') result_organization "
|
||||||
+ "LEFT JOIN (SELECT source, collect_set(target) org_set "
|
+ "LEFT JOIN (SELECT source, collect_set(target) org_set "
|
||||||
+ " FROM relation "
|
+ " FROM relation "
|
||||||
+ " WHERE datainfo.deletedbyinference = false "
|
+ " WHERE datainfo.deletedbyinference = false "
|
||||||
+ " AND relClass = '"
|
+ " AND lower(relClass) = '"
|
||||||
+ ModelConstants.MERGES
|
+ ModelConstants.MERGES.toLowerCase()
|
||||||
+ "' "
|
+ "' "
|
||||||
+ " GROUP BY source) organization_organization "
|
+ " GROUP BY source) organization_organization "
|
||||||
+ "ON result_organization.target = organization_organization.source ";
|
+ "ON result_organization.target = organization_organization.source ";
|
||||||
|
|
|
@ -144,8 +144,8 @@ public class PrepareResultCommunitySetStep1 {
|
||||||
String resultContextQuery = String
|
String resultContextQuery = String
|
||||||
.format(
|
.format(
|
||||||
RESULT_CONTEXT_QUERY_TEMPLATE,
|
RESULT_CONTEXT_QUERY_TEMPLATE,
|
||||||
getConstraintList(" co.id = '", communityIdList),
|
getConstraintList(" lower(co.id) = '", communityIdList),
|
||||||
getConstraintList(" relClass = '", allowedsemrel));
|
getConstraintList(" lower(relClass) = '", allowedsemrel));
|
||||||
|
|
||||||
Dataset<Row> result_context = spark.sql(resultContextQuery);
|
Dataset<Row> result_context = spark.sql(resultContextQuery);
|
||||||
result_context.createOrReplaceTempView("result_context");
|
result_context.createOrReplaceTempView("result_context");
|
||||||
|
|
|
@ -91,8 +91,8 @@ public class PrepareResultInstRepoAssociation {
|
||||||
+ "AND datainfo.deletedbyinference = false ) d "
|
+ "AND datainfo.deletedbyinference = false ) d "
|
||||||
+ "JOIN ( SELECT source, target "
|
+ "JOIN ( SELECT source, target "
|
||||||
+ "FROM relation "
|
+ "FROM relation "
|
||||||
+ "WHERE relclass = '"
|
+ "WHERE lower(relclass) = '"
|
||||||
+ ModelConstants.IS_PROVIDED_BY
|
+ ModelConstants.IS_PROVIDED_BY.toLowerCase()
|
||||||
+ "' "
|
+ "' "
|
||||||
+ "AND datainfo.deletedbyinference = false ) rel "
|
+ "AND datainfo.deletedbyinference = false ) rel "
|
||||||
+ "ON d.id = rel.source ";
|
+ "ON d.id = rel.source ";
|
||||||
|
@ -111,8 +111,8 @@ public class PrepareResultInstRepoAssociation {
|
||||||
String query = "Select source resultId, collect_set(target) organizationSet "
|
String query = "Select source resultId, collect_set(target) organizationSet "
|
||||||
+ "from relation "
|
+ "from relation "
|
||||||
+ "where datainfo.deletedbyinference = false "
|
+ "where datainfo.deletedbyinference = false "
|
||||||
+ "and relClass = '"
|
+ "and lower(relClass) = '"
|
||||||
+ ModelConstants.HAS_AUTHOR_INSTITUTION
|
+ ModelConstants.HAS_AUTHOR_INSTITUTION.toLowerCase()
|
||||||
+ "' "
|
+ "' "
|
||||||
+ "group by source";
|
+ "group by source";
|
||||||
|
|
||||||
|
|
|
@ -266,7 +266,6 @@
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/publication</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -298,7 +297,6 @@
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -330,7 +328,6 @@
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
@ -362,7 +359,6 @@
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
<arg>--outputPath</arg><arg>${outputPath}/software</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -176,7 +176,6 @@
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
@ -206,7 +205,6 @@
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
@ -236,7 +234,6 @@
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
@ -266,7 +263,6 @@
|
||||||
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
<arg>--datasourceOrganizationPath</arg><arg>${workingDir}/preparedInfo/datasourceOrganization</arg>
|
||||||
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
<arg>--alreadyLinkedPath</arg><arg>${workingDir}/preparedInfo/alreadyLinked</arg>
|
||||||
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
<arg>--hive_metastore_uris</arg><arg>${hive_metastore_uris}</arg>
|
||||||
<arg>--saveGraph</arg><arg>${saveGraph}</arg>
|
|
||||||
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
|
||||||
</spark>
|
</spark>
|
||||||
<ok to="wait2"/>
|
<ok to="wait2"/>
|
||||||
|
|
|
@ -8,6 +8,8 @@ import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Funder;
|
||||||
|
import eu.dnetlib.dhp.schema.dump.oaf.graph.Project;
|
||||||
import org.apache.spark.SparkConf;
|
import org.apache.spark.SparkConf;
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
|
|
@ -46,7 +46,7 @@ object DLIToOAF {
|
||||||
"IsReferencedBy" -> ("isRelatedTo", "relationship"),
|
"IsReferencedBy" -> ("isRelatedTo", "relationship"),
|
||||||
"References" -> ("isRelatedTo", "relationship"),
|
"References" -> ("isRelatedTo", "relationship"),
|
||||||
"IsRelatedTo" -> ("isRelatedTo", "relationship"),
|
"IsRelatedTo" -> ("isRelatedTo", "relationship"),
|
||||||
"IsSupplementedBy" -> ("IsSupplementedBy", "supplement"),
|
"IsSupplementedBy" -> ("isSupplementedBy", "supplement"),
|
||||||
"Cites" -> ("cites", "citation"),
|
"Cites" -> ("cites", "citation"),
|
||||||
"Unknown" -> ("isRelatedTo", "relationship"),
|
"Unknown" -> ("isRelatedTo", "relationship"),
|
||||||
"IsSourceOf" -> ("isRelatedTo", "relationship"),
|
"IsSourceOf" -> ("isRelatedTo", "relationship"),
|
||||||
|
|
|
@ -597,6 +597,12 @@
|
||||||
|
|
||||||
<action name="drop_solr_collection">
|
<action name="drop_solr_collection">
|
||||||
<java>
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
<main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
|
<main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
<arg>--format</arg><arg>${format}</arg>
|
<arg>--format</arg><arg>${format}</arg>
|
||||||
|
@ -639,6 +645,12 @@
|
||||||
|
|
||||||
<action name="commit_solr_collection">
|
<action name="commit_solr_collection">
|
||||||
<java>
|
<java>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>oozie.launcher.mapreduce.user.classpath.first</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
<main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
|
<main-class>eu.dnetlib.dhp.oa.provision.SolrAdminApplication</main-class>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
<arg>--format</arg><arg>${format}</arg>
|
<arg>--format</arg><arg>${format}</arg>
|
||||||
|
|
Loading…
Reference in New Issue