forked from D-Net/dnet-hadoop
Merge branch 'master' of code-repo.d4science.org:D-Net/dnet-hadoop
This commit is contained in:
commit
201d79021e
|
@ -6,7 +6,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-build</artifactId>
|
<artifactId>dhp-build</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>dhp-build-assembly-resources</artifactId>
|
<artifactId>dhp-build-assembly-resources</artifactId>
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-build</artifactId>
|
<artifactId>dhp-build</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<artifactId>dhp-build-properties-maven-plugin</artifactId>
|
<artifactId>dhp-build-properties-maven-plugin</artifactId>
|
||||||
|
@ -102,7 +102,7 @@
|
||||||
</goals>
|
</goals>
|
||||||
</pluginExecutionFilter>
|
</pluginExecutionFilter>
|
||||||
<action>
|
<action>
|
||||||
<ignore></ignore>
|
<ignore />
|
||||||
</action>
|
</action>
|
||||||
</pluginExecution>
|
</pluginExecution>
|
||||||
</pluginExecutions>
|
</pluginExecutions>
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>dhp-build</artifactId>
|
<artifactId>dhp-build</artifactId>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
<relativePath>../</relativePath>
|
<relativePath>../</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
<relativePath>../</relativePath>
|
<relativePath>../</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<artifactId>dhp-aggregation</artifactId>
|
<artifactId>dhp-aggregation</artifactId>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<artifactId>dhp-dedup-openaire</artifactId>
|
<artifactId>dhp-dedup-openaire</artifactId>
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
----------------------------------------------------------------
|
|
||||||
Thu Mar 26 19:43:00 CET 2020:
|
|
||||||
Booting Derby version The Apache Software Foundation - Apache Derby - 10.12.1.1 - (1704137): instance a816c00e-0171-1827-9724-000012c70f40
|
|
||||||
on database directory /private/var/folders/xn/nr5vdk8n1572rvrnx5890_d80000gn/T/junit3871072562876431144/junit_metastore_db with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4e6b5ed4
|
|
||||||
Loaded from file:/Users/claudio/.m2/repository/org/apache/derby/derby/10.12.1.1/derby-10.12.1.1.jar
|
|
||||||
java.vendor=Oracle Corporation
|
|
||||||
java.runtime.version=1.8.0_181-b13
|
|
||||||
user.dir=/Users/claudio/workspace/git/dnet-hadoop/dhp-workflows/dhp-graph-mapper
|
|
||||||
os.name=Mac OS X
|
|
||||||
os.arch=x86_64
|
|
||||||
os.version=10.15.3
|
|
||||||
derby.system.home=null
|
|
||||||
Database Class Loader started - derby.database.classpath=''
|
|
|
@ -1,10 +1,9 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -1,14 +0,0 @@
|
||||||
sparkExecutorCoresForJoining=1
|
|
||||||
sparkDriverMemoryForJoining=10G
|
|
||||||
sparkExecutorMemoryForJoining=15G
|
|
||||||
sparkExecutorCoresForIndexing=64
|
|
||||||
sparkDriverMemoryForIndexing=3G
|
|
||||||
sparkExecutorMemoryForIndexing=2G
|
|
||||||
#isLookupUrl=http://services.openaire.eu:8280/is/services/isLookUp
|
|
||||||
isLookupUrl=http://beta.services.openaire.eu:8280/is/services/isLookUp?wsdl
|
|
||||||
sourcePath=/tmp/db_openaireplus_services.export_dhp.2020.02.03
|
|
||||||
outputPath=/tmp/openaire_provision
|
|
||||||
format=TMF
|
|
||||||
batchSize=2000
|
|
||||||
reuseRecords=false
|
|
||||||
otherDsTypeId=scholarcomminfra, infospace, pubsrepository::mock, entityregistry, entityregistry::projects, entityregistry::repositories, websource
|
|
|
@ -3,7 +3,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<artifactId>dhp-workflows</artifactId>
|
<artifactId>dhp-workflows</artifactId>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
|
|
@ -29,14 +29,9 @@ public class SparkXmlRecordBuilderJob {
|
||||||
final String otherDsTypeId = parser.get("otherDsTypeId");
|
final String otherDsTypeId = parser.get("otherDsTypeId");
|
||||||
|
|
||||||
final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
|
final FileSystem fs = FileSystem.get(spark.sparkContext().hadoopConfiguration());
|
||||||
if (fs.exists(new Path(outputPath))) {
|
|
||||||
fs.delete(new Path(outputPath), true);
|
|
||||||
fs.mkdirs(new Path(outputPath));
|
|
||||||
}
|
|
||||||
|
|
||||||
new GraphJoiner(spark, ContextMapper.fromIS(isLookupUrl), otherDsTypeId, inputPath, outputPath)
|
new GraphJoiner(spark, ContextMapper.fromIS(isLookupUrl), otherDsTypeId, inputPath, outputPath)
|
||||||
.adjacencyLists();
|
.adjacencyLists();
|
||||||
//.asXML();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,17 +7,17 @@ import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
public class TemplateResources {
|
public class TemplateResources {
|
||||||
|
|
||||||
private String record = read("eu/dnetlib/dhp/graph/template/record.st");
|
private String record = read("eu/dnetlib/dhp/oa/provision/template/record.st");
|
||||||
|
|
||||||
private String instance = read("eu/dnetlib/dhp/graph/template/instance.st");
|
private String instance = read("eu/dnetlib/dhp/oa/provision/template/instance.st");
|
||||||
|
|
||||||
private String rel = read("eu/dnetlib/dhp/graph/template/rel.st");
|
private String rel = read("eu/dnetlib/dhp/oa/provision/template/rel.st");
|
||||||
|
|
||||||
private String webresource = read("eu/dnetlib/dhp/graph/template/webresource.st");
|
private String webresource = read("eu/dnetlib/dhp/oa/provision/template/webresource.st");
|
||||||
|
|
||||||
private String child = read("eu/dnetlib/dhp/graph/template/child.st");
|
private String child = read("eu/dnetlib/dhp/oa/provision/template/child.st");
|
||||||
|
|
||||||
private String entity = read("eu/dnetlib/dhp/graph/template/entity.st");
|
private String entity = read("eu/dnetlib/dhp/oa/provision/template/entity.st");
|
||||||
|
|
||||||
private static String read(final String classpathResource) throws IOException {
|
private static String read(final String classpathResource) throws IOException {
|
||||||
return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8);
|
return Resources.toString(Resources.getResource(classpathResource), StandardCharsets.UTF_8);
|
||||||
|
|
|
@ -58,6 +58,10 @@
|
||||||
|
|
||||||
<action name="adjancency_lists">
|
<action name="adjancency_lists">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<prepare>
|
||||||
|
<delete path="${outputPath}"/>
|
||||||
|
<mkdir path="${outputPath}"/>
|
||||||
|
</prepare>
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
<mode>cluster</mode>
|
<mode>cluster</mode>
|
||||||
<name>build_adjacency_lists</name>
|
<name>build_adjacency_lists</name>
|
||||||
|
@ -67,7 +71,6 @@
|
||||||
--executor-cores ${sparkExecutorCoresForJoining}
|
--executor-cores ${sparkExecutorCoresForJoining}
|
||||||
--executor-memory ${sparkExecutorMemoryForJoining}
|
--executor-memory ${sparkExecutorMemoryForJoining}
|
||||||
--driver-memory=${sparkDriverMemoryForJoining}
|
--driver-memory=${sparkDriverMemoryForJoining}
|
||||||
--conf spark.dynamicAllocation.maxExecutors=${sparkExecutorCoresForJoining}
|
|
||||||
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
|
--conf spark.extraListeners="com.cloudera.spark.lineage.NavigatorAppListener"
|
||||||
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
|
--conf spark.sql.queryExecutionListeners="com.cloudera.spark.lineage.NavigatorQueryListener"
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
<relativePath>../</relativePath>
|
<relativePath>../</relativePath>
|
||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
|
|
6
pom.xml
6
pom.xml
|
@ -1,11 +1,9 @@
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
|
||||||
|
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>eu.dnetlib.dhp</groupId>
|
<groupId>eu.dnetlib.dhp</groupId>
|
||||||
<artifactId>dhp</artifactId>
|
<artifactId>dhp</artifactId>
|
||||||
<version>1.1.6-SNAPSHOT</version>
|
<version>1.1.7-SNAPSHOT</version>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
|
|
||||||
<url>http://www.d-net.research-infrastructures.eu</url>
|
<url>http://www.d-net.research-infrastructures.eu</url>
|
||||||
|
|
Loading…
Reference in New Issue