Master branch updates from beta September 2023 #337
|
@ -46,7 +46,7 @@ public class PrepareAffiliationRelations implements Serializable {
|
||||||
private static final String ID_PREFIX = "50|doi_________::";
|
private static final String ID_PREFIX = "50|doi_________::";
|
||||||
public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
|
public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
|
||||||
public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
|
public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
|
||||||
public static final String BIP_INFERENCE_PROVENANCE = "bip_affiliation";
|
public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref";
|
||||||
|
|
||||||
public static <I extends Result> void main(String[] args) throws Exception {
|
public static <I extends Result> void main(String[] args) throws Exception {
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
|
||||||
|
# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
|
||||||
|
# dhp.hadoop.frontend.user.name=ilias.kanellos
|
||||||
|
# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
|
||||||
|
# dhp.hadoop.frontend.port.ssh=22
|
||||||
|
# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
|
||||||
|
# jobTracker=yarnRM
|
||||||
|
# nameNode=hdfs://nameservice1
|
||||||
|
# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
|
||||||
|
# maven.executable=mvn
|
||||||
|
|
||||||
|
# Some memory and driver settings for more demanding tasks
|
||||||
|
sparkDriverMemory=10G
|
||||||
|
sparkExecutorMemory=10G
|
||||||
|
sparkExecutorCores=4
|
||||||
|
sparkShufflePartitions=7680
|
||||||
|
|
||||||
|
# The above is given differently in an example I found online
|
||||||
|
oozie.action.sharelib.for.spark=spark2
|
||||||
|
oozieActionShareLibForSpark2=spark2
|
||||||
|
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
|
||||||
|
spark2EventLogDir=/user/spark/spark2ApplicationHistory
|
||||||
|
sparkSqlWarehouseDir=/user/hive/warehouse
|
||||||
|
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
|
||||||
|
# This MAY avoid the no library used error
|
||||||
|
oozie.use.system.libpath=true
|
||||||
|
# Some stuff copied from openaire's jobs
|
||||||
|
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
||||||
|
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
||||||
|
|
||||||
|
# I think this should be the oozie workflow directory
|
||||||
|
# oozieWorkflowPath=/user/ilias.kanellos/workflow_example/
|
||||||
|
|
||||||
|
|
||||||
|
# The workflow application path
|
||||||
|
wfAppPath=${oozieTopWfApplicationPath}
|
||||||
|
|
||||||
|
# The following is needed as a property of a workflow
|
||||||
|
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
||||||
|
|
||||||
|
inputPath=/user/schatz/affiliations/data-v3.json
|
||||||
|
outputPath=/tmp/crossref-affiliations-output-v3
|
|
@ -79,7 +79,6 @@
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
||||||
|
|
||||||
<action name="atomicactions">
|
<action name="atomicactions">
|
||||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
<master>yarn</master>
|
<master>yarn</master>
|
||||||
|
|
|
@ -94,11 +94,11 @@ public class PrepareAffiliationRelationsTest {
|
||||||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||||
.map(aa -> ((Relation) aa.getPayload()));
|
.map(aa -> ((Relation) aa.getPayload()));
|
||||||
|
|
||||||
for (Relation r : tmp.collect()) {
|
// for (Relation r : tmp.collect()) {
|
||||||
System.out.println(
|
// System.out.println(
|
||||||
r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
|
// r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
|
||||||
);
|
// );
|
||||||
}
|
// }
|
||||||
// count the number of relations
|
// count the number of relations
|
||||||
assertEquals(16, tmp.count());
|
assertEquals(16, tmp.count());
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue