forked from D-Net/dnet-hadoop
Minor changes
This commit is contained in:
parent
4eba14a80e
commit
bc1a4611aa
|
@ -46,7 +46,7 @@ public class PrepareAffiliationRelations implements Serializable {
|
|||
private static final String ID_PREFIX = "50|doi_________::";
|
||||
public static final String BIP_AFFILIATIONS_CLASSID = "result:organization:bipinference";
|
||||
public static final String BIP_AFFILIATIONS_CLASSNAME = "Affiliation relation inferred by BIP!";
|
||||
public static final String BIP_INFERENCE_PROVENANCE = "bip_affiliation";
|
||||
public static final String BIP_INFERENCE_PROVENANCE = "bip:affiliation:crossref";
|
||||
|
||||
public static <I extends Result> void main(String[] args) throws Exception {
|
||||
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
# --- You can override the following properties (if needed) coming from your ~/.dhp/application.properties ---
|
||||
# dhp.hadoop.frontend.temp.dir=/home/ilias.kanellos
|
||||
# dhp.hadoop.frontend.user.name=ilias.kanellos
|
||||
# dhp.hadoop.frontend.host.name=iis-cdh5-test-gw.ocean.icm.edu.pl
|
||||
# dhp.hadoop.frontend.port.ssh=22
|
||||
# oozieServiceLoc=http://iis-cdh5-test-m3:11000/oozie
|
||||
# jobTracker=yarnRM
|
||||
# nameNode=hdfs://nameservice1
|
||||
# oozie.execution.log.file.location = target/extract-and-run-on-remote-host.log
|
||||
# maven.executable=mvn
|
||||
|
||||
# Some memory and driver settings for more demanding tasks
|
||||
sparkDriverMemory=10G
|
||||
sparkExecutorMemory=10G
|
||||
sparkExecutorCores=4
|
||||
sparkShufflePartitions=7680
|
||||
|
||||
# The above is given differently in an example I found online
|
||||
oozie.action.sharelib.for.spark=spark2
|
||||
oozieActionShareLibForSpark2=spark2
|
||||
spark2YarnHistoryServerAddress=http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089
|
||||
spark2EventLogDir=/user/spark/spark2ApplicationHistory
|
||||
sparkSqlWarehouseDir=/user/hive/warehouse
|
||||
hiveMetastoreUris=thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083
|
||||
# This MAY avoid the no library used error
|
||||
oozie.use.system.libpath=true
|
||||
# Some stuff copied from openaire's jobs
|
||||
spark2ExtraListeners=com.cloudera.spark.lineage.NavigatorAppListener
|
||||
spark2SqlQueryExecutionListeners=com.cloudera.spark.lineage.NavigatorQueryListener
|
||||
|
||||
# I think this should be the oozie workflow directory
|
||||
# oozieWorkflowPath=/user/ilias.kanellos/workflow_example/
|
||||
|
||||
|
||||
# The workflow application path
|
||||
wfAppPath=${oozieTopWfApplicationPath}
|
||||
|
||||
# The following is needed as a property of a workflow
|
||||
oozie.wf.application.path=${oozieTopWfApplicationPath}
|
||||
|
||||
inputPath=/user/schatz/affiliations/data-v3.json
|
||||
outputPath=/tmp/crossref-affiliations-output-v3
|
|
@ -79,7 +79,6 @@
|
|||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
|
||||
<action name="atomicactions">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
|
|
|
@ -94,11 +94,11 @@ public class PrepareAffiliationRelationsTest {
|
|||
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
|
||||
.map(aa -> ((Relation) aa.getPayload()));
|
||||
|
||||
for (Relation r : tmp.collect()) {
|
||||
System.out.println(
|
||||
r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
|
||||
);
|
||||
}
|
||||
// for (Relation r : tmp.collect()) {
|
||||
// System.out.println(
|
||||
// r.getSource() + "\t" + r.getTarget() + "\t" + r.getRelType() + "\t" + r.getRelClass() + "\t" + r.getSubRelType() + "\t" + r.getValidationDate() + "\t" + r.getDataInfo().getTrust() + "\t" + r.getDataInfo().getInferred()
|
||||
// );
|
||||
// }
|
||||
// count the number of relations
|
||||
assertEquals(16, tmp.count());
|
||||
|
||||
|
|
Loading…
Reference in New Issue