diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java index 45e712c7e..44870c0f8 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java @@ -46,7 +46,7 @@ public class PrepareAffiliationRelations implements Serializable { private static final String ID_PREFIX = "50|doi_________::"; public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference"; public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!"; - public static final String BIP_INFERENCE_PROVENANCE = "bip_affiliation"; + public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref"; public static void main(String[] args) throws Exception { diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties new file mode 100644 index 000000000..dce59a31f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties @@ -0,0 +1,42 @@ +# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties --- +# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos +# dhp.hadoop.frontend.user.name=ilias.kanellos +# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl +# dhp.hadoop.frontend.port.ssh=22 +# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie +# jobTracker=yarnRM +# nameNode=hdfs://nameservice1 +# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log +# maven.executable=mvn + +# Some memory and driver settings for more demanding tasks +sparkDriverMemory=10G +sparkExecutorMemory=10G +sparkExecutorCores=4 +sparkShufflePartitions=7680 + +# The above is given differently in an example I found online +oozie.action.sharelib.for.spark=spark2 +oozieActionShareLibForSpark2=spark2 +spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 +spark2EventLogDir=/user/spark/spark2ApplicationHistory +sparkSqlWarehouseDir=/user/hive/warehouse +hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 +# This MAY avoid the no library used error +oozie.use.system.libpath=true +# Some stuff copied from openaire's jobs +spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener +spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener + +# I think this should be the oozie workflow directory +# oozieWorkflowPath=/user/ilias.kanellos/workflow_example/ + + +# The workflow application path +wfAppPath=${oozieTopWfApplicationPath} + +# The following is needed as a property of a workflow +oozie.wf.application.path=${oozieTopWfApplicationPath} + +inputPath=/user/schatz/affiliations/data-v3.json +outputPath=/tmp/crossref-affiliations-output-v3 diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml index 7c44bb7eb..31f35adfd 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml @@ -79,7 +79,6 @@ - yarn diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java index c76fcf6a9..7e2fc5a39 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java @@ -94,11 +94,11 @@ public class PrepareAffiliationRelationsTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - for (Relation r : tmp.collect()) { - System.out.println( - r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred() - ); - } +// for (Relation r : tmp.collect()) { +// System.out.println( +// r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred() +// ); +// } // count the number of relations assertEquals(16, tmp.count());