Minor changes

2023-07-17 11:17:53 +03:00 · 2023-07-17 11:17:53 +03:00 · bc1a4611aa
parent 4eba14a80e
commit bc1a4611aa
4 changed files with 48 additions and 7 deletions
--- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
+++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java
@ -46,7 +46,7 @@ public class PrepareAffiliationRelations implements Serializable {
    private static final String ID_PREFIX = "50|doi_________::";
    public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
    public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
-    public static final String BIP_INFERENCE_PROVENANCE = "bip_affiliation";
+    public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref";

    public static <I extends Result> void main(String[] args) throws Exception {

--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/job.properties
@ -0,0 +1,42 @@
+# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
+# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
+# dhp.hadoop.frontend.user.name=ilias.kanellos
+# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
+# dhp.hadoop.frontend.port.ssh=22
+# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
+# jobTracker=yarnRM
+# nameNode=hdfs://nameservice1
+# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
+# maven.executable=mvn
+
+# Some memory and driver settings for more demanding tasks
+sparkDriverMemory=10G
+sparkExecutorMemory=10G
+sparkExecutorCores=4
+sparkShufflePartitions=7680
+
+# The above is given differently in an example I found online
+oozie.action.sharelib.for.spark=spark2
+oozieActionShareLibForSpark2=spark2
+spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
+spark2EventLogDir=/user/spark/spark2ApplicationHistory
+sparkSqlWarehouseDir=/user/hive/warehouse
+hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
+# This MAY avoid the no library used error
+oozie.use.system.libpath=true
+# Some stuff copied from openaire's jobs
+spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
+spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
+
+# I think this should be the oozie workflow directory
+# oozieWorkflowPath=/user/ilias.kanellos/workflow_example/
+
+
+# The workflow application path
+wfAppPath=${oozieTopWfApplicationPath}
+
+# The following is needed as a property of a workflow
+oozie.wf.application.path=${oozieTopWfApplicationPath}
+
+inputPath=/user/schatz/affiliations/data-v3.json
+outputPath=/tmp/crossref-affiliations-output-v3
--- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
+++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/oozie_app/workflow.xml
@ -79,7 +79,6 @@
        <error to="Kill"/>
    </action>

-
    <action name="atomicactions">
        <spark xmlns="uri:oozie:spark-action:0.2">
            <master>yarn</master>
--- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
+++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelationsTest.java
@ -94,11 +94,11 @@ public class PrepareAffiliationRelationsTest {
            .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
            .map(aa -> ((Relation) aa.getPayload()));

-        for (Relation r : tmp.collect()) {
-            System.out.println(
-                    r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
-            );
-        }
+//        for (Relation r : tmp.collect()) {
+//            System.out.println(
+//                    r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
+//            );
+//        }
        // count the number of relations
        assertEquals(16, tmp.count());