1
0
Fork 0

Merge branch 'beta' of code-repo.d4science.org:D-Net/dnet-hadoop into mvn_site_documentation

This commit is contained in:
Sandro La Bruzzo 2021-11-16 15:16:49 +01:00
commit 18c1d70ef4
4 changed files with 26 additions and 26 deletions

View File

@ -65,10 +65,10 @@ public class CreateActionSetSparkJob implements Serializable {
final String outputPath = parser.get("outputPath"); final String outputPath = parser.get("outputPath");
log.info("outputPath {}", outputPath); log.info("outputPath {}", outputPath);
final boolean shouldDuplicateRels = final boolean shouldDuplicateRels = Optional
Optional.ofNullable(parser.get("shouldDuplicateRels")) .ofNullable(parser.get("shouldDuplicateRels"))
.map(Boolean::valueOf) .map(Boolean::valueOf)
.orElse(Boolean.FALSE); .orElse(Boolean.FALSE);
SparkConf conf = new SparkConf(); SparkConf conf = new SparkConf();
runWithSparkSession( runWithSparkSession(

View File

@ -84,8 +84,8 @@ public class CreateOpenCitationsASTest {
new String[] { new String[] {
"-isSparkSessionManaged", "-isSparkSessionManaged",
Boolean.FALSE.toString(), Boolean.FALSE.toString(),
"-shouldDuplicateRels", "-shouldDuplicateRels",
Boolean.TRUE.toString(), Boolean.TRUE.toString(),
"-inputPath", "-inputPath",
inputPath, inputPath,
"-outputPath", "-outputPath",
@ -101,7 +101,7 @@ public class CreateOpenCitationsASTest {
assertEquals(60, tmp.count()); assertEquals(60, tmp.count());
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
} }
@ -109,31 +109,31 @@ public class CreateOpenCitationsASTest {
void testNumberofRelations2() throws Exception { void testNumberofRelations2() throws Exception {
String inputPath = getClass() String inputPath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles") "/eu/dnetlib/dhp/actionmanager/opencitations/inputFiles")
.getPath(); .getPath();
CreateActionSetSparkJob CreateActionSetSparkJob
.main( .main(
new String[] { new String[] {
"-isSparkSessionManaged", "-isSparkSessionManaged",
Boolean.FALSE.toString(), Boolean.FALSE.toString(),
"-inputPath", "-inputPath",
inputPath, inputPath,
"-outputPath", "-outputPath",
workingDir.toString() + "/actionSet" workingDir.toString() + "/actionSet"
}); });
final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
JavaRDD<Relation> tmp = sc JavaRDD<Relation> tmp = sc
.sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class) .sequenceFile(workingDir.toString() + "/actionSet", Text.class, Text.class)
.map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class))
.map(aa -> ((Relation) aa.getPayload())); .map(aa -> ((Relation) aa.getPayload()));
assertEquals(44, tmp.count()); assertEquals(44, tmp.count());
// tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r))); // tmp.foreach(r -> System.out.println(OBJECT_MAPPER.writeValueAsString(r)));
} }

View File

@ -89,7 +89,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=15000
</spark-opts> </spark-opts>
<arg>--graphBasePath</arg><arg>${graphBasePath}</arg> <arg>--graphBasePath</arg><arg>${graphBasePath}</arg>
<arg>--o</arg><arg>${graphOutputPath}</arg> <arg>--o</arg><arg>${graphOutputPath}</arg>
@ -114,7 +114,7 @@
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680 --conf spark.sql.shuffle.partitions=15000
</spark-opts> </spark-opts>
<arg>--graphInputPath</arg><arg>${graphBasePath}</arg> <arg>--graphInputPath</arg><arg>${graphBasePath}</arg>
<arg>--outputPath</arg><arg>${workingPath}/grouped_entities</arg> <arg>--outputPath</arg><arg>${workingPath}/grouped_entities</arg>

View File

@ -19,7 +19,7 @@ object SparkResolveEntities {
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
val log: Logger = LoggerFactory.getLogger(getClass) val log: Logger = LoggerFactory.getLogger(getClass)
val conf: SparkConf = new SparkConf() val conf: SparkConf = new SparkConf()
val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_params.json"))) val parser = new ArgumentApplicationParser(IOUtils.toString(getClass.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/resolution/resolve_entities_params.json")))
parser.parseArgument(args) parser.parseArgument(args)
val spark: SparkSession = val spark: SparkSession =
SparkSession SparkSession