[Dump Subset] fixing issue and finalizing workflow

This commit is contained in:
Miriam Baglioni 2022-11-21 14:10:46 +01:00
parent 8878b96204
commit d3da9ab2c6
14 changed files with 318 additions and 160 deletions

View File

@ -37,7 +37,6 @@ import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.CardinalityTooHighException;
import eu.dnetlib.dhp.oa.graph.dump.exceptions.NoAvailableEntityTypeException;
import eu.dnetlib.dhp.oa.model.Container;
import eu.dnetlib.dhp.oa.model.Result;
import eu.dnetlib.dhp.oa.model.graph.*;
@ -549,13 +548,12 @@ public class SparkDumpEntitiesJob implements Serializable {
private static <E extends OafEntity> void organizationMap(SparkSession spark, String inputPath, String outputPath,
Class<E> inputClazz) {
Dataset<Organization> tmp = Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class));
tmp.foreach((ForeachFunction<Organization>) o -> System.out.println(new ObjectMapper().writeValueAsString(o)));
tmp.filter(Objects::nonNull)
Utils
.readPath(spark, inputPath, inputClazz)
.map(
(MapFunction<E, Organization>) o -> mapOrganization((eu.dnetlib.dhp.schema.oaf.Organization) o),
Encoders.bean(Organization.class))
.filter((FilterFunction<Organization>) o -> o != null)
.write()
.mode(SaveMode.Overwrite)
.option(COMPRESSION, GZIP)

View File

@ -168,7 +168,7 @@ public class SparkDumpResult implements Serializable {
private static <I extends OafEntity> I filterResult(I value, Map<String, String> pathMap,
SelectionConstraints selectionConstraints, Class<I> inputClazz) {
Optional<DataInfo> odInfo = Optional.ofNullable(value.getDataInfo());
log.info("Filtering result");
if (Boolean.FALSE.equals(odInfo.isPresent())) {
return null;
}

View File

@ -148,9 +148,6 @@ public class SparkSelectSubset implements Serializable {
.joinWith(resultIds, relation.col("target").equalTo(resultIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource
.foreach((ForeachFunction<Relation>) r -> System.out.println(new ObjectMapper().writeValueAsString(r)));
relJoinSource
.joinWith(otherIds, relJoinSource.col("source").equalTo(otherIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
@ -218,11 +215,18 @@ public class SparkSelectSubset implements Serializable {
(FlatMapFunction<OtherResearchProduct, Instance>) p -> p.getInstance().iterator(),
Encoders.bean(Instance.class)))
.flatMap(
(FlatMapFunction<Instance, String>) i -> Arrays
.asList(i.getCollectedfrom().getKey(), i.getHostedby().getKey())
.iterator(),
(FlatMapFunction<Instance, String>) i -> {
ArrayList<String> ret = new ArrayList<>();
if (Optional.ofNullable(i.getCollectedfrom()).isPresent()
&& Optional.ofNullable(i.getCollectedfrom().getKey()).isPresent())
ret.add(i.getCollectedfrom().getKey());
if (Optional.ofNullable(i.getHostedby()).isPresent()
&& Optional.ofNullable(i.getHostedby().getKey()).isPresent())
ret.add(i.getHostedby().getKey());
return ret.iterator();
},
Encoders.STRING())
.filter((FilterFunction<String>) s -> !s.equals(ModelConstants.UNKNOWN))
.filter((FilterFunction<String>) s -> !s.equals(ModelConstants.UNKNOWN_REPOSITORY.getKey()))
.distinct();
datasource

View File

@ -10,7 +10,6 @@ import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
@ -24,12 +23,12 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.*;
import scala.Tuple2;
/**
@ -86,7 +85,8 @@ public class SparkSelectValidContext implements Serializable {
Dataset<String> context = getFilter(spark, inputPath + "/publication", keys, Publication.class)
.union(getFilter(spark, inputPath + "/dataset", keys, eu.dnetlib.dhp.schema.oaf.Dataset.class))
.union(getFilter(spark, inputPath + "/software", keys, Software.class))
.union(getFilter(spark, inputPath + "/otherresearchproduct", keys, OtherResearchProduct.class));
.union(getFilter(spark, inputPath + "/otherresearchproduct", keys, OtherResearchProduct.class))
.distinct();
Dataset<ResearchCommunity> researchCommunity = Utils.readPath(spark, contextPath, ResearchCommunity.class);
@ -108,7 +108,7 @@ public class SparkSelectValidContext implements Serializable {
return Utils
.readPath(spark, inputPath, inputClazz)
.filter((FilterFunction<I>) r -> isPresentContext(r))
.flatMap(
.flatMap(
(FlatMapFunction<I, String>) r -> r
.getContext()
.stream()
@ -116,8 +116,7 @@ public class SparkSelectValidContext implements Serializable {
.collect(Collectors.toList())
.iterator(),
Encoders.STRING())
.filter((FilterFunction<String>) c -> extracted(c, keys)).distinct();
.filter((FilterFunction<String>) c -> extracted(c, keys));
}

View File

@ -5,13 +5,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import java.io.Serializable;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
@ -20,13 +17,13 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.oa.graph.dump.Utils;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.Relation;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.*;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import scala.Tuple2;
/**
@ -41,7 +38,7 @@ public class SparkSelectValidRelationContext implements Serializable {
.toString(
SparkSelectValidRelationContext.class
.getResourceAsStream(
""));
"/eu/dnetlib/dhp/oa/graph/dump/input_select_valid_relation_context_parameters.json"));
final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration);
parser.parseArgument(args);
@ -56,9 +53,6 @@ public class SparkSelectValidRelationContext implements Serializable {
final String inputPath = parser.get("sourcePath");
log.info("inputPath: {}", inputPath);
final String contextPath = parser.get("contextPath");
log.info("contextPath: {}", contextPath);
final String contextRelationPath = parser.get("contextRelationPath");
log.info("contextRelationPath: {}", contextRelationPath);
@ -68,7 +62,7 @@ public class SparkSelectValidRelationContext implements Serializable {
conf,
isSparkSessionManaged,
spark -> {
selectValidRelation(spark, inputPath, contextRelationPath);
selectValidRelation(spark, inputPath, contextRelationPath);
});
@ -110,33 +104,45 @@ public class SparkSelectValidRelationContext implements Serializable {
(MapFunction<eu.dnetlib.dhp.oa.model.graph.Datasource, String>) o -> o.getId(),
Encoders.STRING()));
Dataset<Relation> relation = Utils
Dataset<Tuple2<String, Relation>> relationSource = Utils
.readPath(spark, contextRelationPath + "/context", Relation.class)
.union(Utils.readPath(spark, contextRelationPath + "/contextOrg", Relation.class));
.union(Utils.readPath(spark, contextRelationPath + "/contextOrg", Relation.class))
.map(
(MapFunction<Relation, Tuple2<String, Relation>>) r -> new Tuple2<>(r.getSource().getId(), r),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
Dataset<CommunityResult> allowedContext = Utils.readPath(spark, inputPath + "/community_infrastructure", CommunityResult.class);
Dataset<ResearchCommunity> allowedContext = Utils
.readPath(spark, inputPath + "/community_infrastructure", ResearchCommunity.class);
Dataset<Relation> relJoinSource = relation
.joinWith(dumpedIds, relation.col("source").equalTo(dumpedIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
Dataset<Tuple2<String, Relation>> relJoinSource = relationSource
.joinWith(dumpedIds, relationSource.col("_1").equalTo(dumpedIds.col("value")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
t2._1()._2().getTarget().getId(), t2._1()._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
relJoinSource
.joinWith(allowedContext, relJoinSource.col("target").equalTo(allowedContext.col("id")))
.joinWith(allowedContext, relJoinSource.col("_1").equalTo(allowedContext.col("id")))
.map(
(MapFunction<Tuple2<Relation, CommunityResult>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
(MapFunction<Tuple2<Tuple2<String, Relation>, ResearchCommunity>, Relation>) t2 -> t2._1()._2(),
Encoders.bean(eu.dnetlib.dhp.oa.model.graph.Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
.json(inputPath + "/relation");
relJoinSource = relation
.joinWith(dumpedIds, relation.col("target").equalTo(dumpedIds.col("value")))
.map((MapFunction<Tuple2<Relation, String>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class));
relJoinSource = relationSource
.joinWith(allowedContext, relationSource.col("_1").equalTo(allowedContext.col("id")))
.map(
(MapFunction<Tuple2<Tuple2<String, Relation>, ResearchCommunity>, Tuple2<String, Relation>>) t2 -> new Tuple2<>(
t2._1()._2().getTarget().getId(), t2._1()._2()),
Encoders.tuple(Encoders.STRING(), Encoders.bean(Relation.class)));
relJoinSource
.joinWith(allowedContext, relJoinSource.col("source").equalTo(allowedContext.col("id")))
.joinWith(dumpedIds, relJoinSource.col("_1").equalTo(dumpedIds.col("value")))
.map(
(MapFunction<Tuple2<Relation, CommunityResult>, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class))
(MapFunction<Tuple2<Tuple2<String, Relation>, String>, Relation>) t2 -> t2._1()._2(),
Encoders.bean(Relation.class))
.write()
.mode(SaveMode.Append)
.option("compression", "gzip")
@ -144,5 +150,4 @@ public class SparkSelectValidRelationContext implements Serializable {
}
}

View File

@ -0,0 +1,27 @@
[
{
"paramName":"s",
"paramLongName":"sourcePath",
"paramDescription": "the path of the sequencial file to read",
"paramRequired": true
},
{
"paramName": "ssm",
"paramLongName": "isSparkSessionManaged",
"paramDescription": "true if the spark session is managed, false otherwise",
"paramRequired": false
},
{
"paramName":"crp",
"paramLongName":"contextRelationPath",
"paramDescription": "the map to find fields in the json",
"paramRequired": false
}
]

View File

@ -84,8 +84,6 @@
<start to="fork_select_and_dump" />
<fork name="fork_select_and_dump">
<path start="select_and_dump_publication"/>
<path start="select_and_dump_dataset"/>
@ -232,7 +230,7 @@
<arg>--outputPath</arg><arg>${outputPath}</arg>
<arg>--removeSet</arg><arg>${removeSet}</arg>
</spark>
<ok to="End"/>
<ok to="fork_dump_otherentities"/>
<error to="Kill"/>
</action>
@ -358,7 +356,7 @@
<arg>--sourcePath</arg><arg>${outputPath}/original</arg>
<arg>--contextPath</arg><arg>${workingDir}/context/community_infrastructure.json.gz</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/communities_infrastructures</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/community_infrastructure</arg>
</spark>
<ok to="join_context"/>
<error to="Kill"/>
@ -420,8 +418,8 @@
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${outputPath}/dump</arg>
<arg>--relationPath</arg><arg>${workingDir}/relation</arg> <!-- new relations from context -->
<arg>--contextPath</arg><arg>${outputPath}/dump/communities_infrastructures</arg>
<arg>--contextRelationPath</arg><arg>${workingDir}/relation</arg> <!-- new relations from context -->
</spark>
<ok to="dump_relation"/>
<error to="Kill"/>
@ -496,7 +494,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/result/dataset</arg>
<arg>--sourcePath</arg><arg>${outputPath}/original/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
@ -522,7 +520,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/result/otherresearchproduct</arg>
<arg>--sourcePath</arg><arg>${outputPath}/original/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
@ -548,7 +546,7 @@
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir}
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/result/software</arg>
<arg>--sourcePath</arg><arg>${outputPath}/original/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${outputPath}/dump/relation</arg>
<arg>--communityMapPath</arg><arg>${communityMapPath}</arg>
@ -558,8 +556,6 @@
</action>
<kill name="Kill">
<message>Sub-workflow dump complete failed with error message ${wf:errorMessage()}
</message>

View File

@ -5,12 +5,12 @@ import java.util.*;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import eu.dnetlib.dhp.oa.model.graph.Relation;
@ -635,8 +635,6 @@ class CreateRelationTest {
tmp.contains("10|opendoar____::97275a23ca44226c9964043c8462be96") &&
tmp.contains("10|doajarticles::2899208a99aa7d142646e0a80bfeef05"));
}
@Test

View File

@ -5,12 +5,11 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import org.apache.commons.io.FileUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.api.java.function.ForeachFunction;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;
@ -25,8 +24,11 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.oa.graph.dump.DumpJobTest;
import eu.dnetlib.dhp.oa.graph.dump.community.CommunityMap;
import eu.dnetlib.dhp.oa.graph.dump.complete.Extractor;
import eu.dnetlib.dhp.oa.graph.dump.complete.SparkDumpEntitiesJob;
import eu.dnetlib.dhp.oa.model.community.CommunityResult;
import eu.dnetlib.dhp.oa.model.graph.GraphResult;
import eu.dnetlib.dhp.oa.model.graph.ResearchCommunity;
import eu.dnetlib.dhp.schema.oaf.*;
/**
@ -181,29 +183,29 @@ public class DumpSubsetTest {
.saveAsTextFile(workingDir.toString() + "/original/otherresearchproduct");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/publication");
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/publication");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/software");
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/software");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/dataset");
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/dataset");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/otherresearchproduct");
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/otherresearchproduct");
SparkSelectSubset
.main(
@ -268,42 +270,6 @@ public class DumpSubsetTest {
}
@Test
public void dumpEntitiesTest() throws Exception {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/organization")
.getPath();
SparkDumpEntitiesJob
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Organization",
"-outputPath", workingDir.toString() + "/dump"
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Organization> tmp = sc
.textFile(workingDir.toString() + "/dump")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Organization.class));
Assertions.assertEquals(3, tmp.count());
tmp
.foreach(
o -> System.out
.println(OBJECT_MAPPER.writeValueAsString(o)));
}
@Test
public void selectValidContextTest() throws Exception {
@ -344,54 +310,125 @@ public class DumpSubsetTest {
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("beopen")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("neanias-underwater")).count());
}
@Test
public void selectValidRelationContextTest() throws Exception {
// final String contextPath = parser.get("contextPath");
// log.info("contextPath: {}", contextPath);
//
// final String contextRelationPath = parser.get("contextRelationPath");
// log.info("contextRelationPath: {}", contextRelationPath);
// final String sourcePath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/")
// .getPath();
//
// final String communityMapPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/communityMap")
// .getPath();
//
// final String contextPath = getClass()
// .getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/context/community_infrastructure")
// .getPath();
//
final String contextRelationPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/working/relation")
.getPath();
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/publication")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/publication");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/software")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/software");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/dataset")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/dataset");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/otherresearchproduct")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/otherresearchproduct");
// SparkSelectValidRelationContext
// .main(
// new String[] {
// "-isSparkSessionManaged", Boolean.FALSE.toString(),
// "-sourcePath", sourcePath,
// "-outputPath", workingDir.toString() + "/dump/community_infrastructure",
// "-communityMapPath", communityMapPath,
// "-contextPath", contextPath
//
// });
//
JavaRDD<ResearchCommunity> tmp = sc
.textFile(workingDir.toString() + "/dump/community_infrastructure")
.map(item -> OBJECT_MAPPER.readValue(item, ResearchCommunity.class));
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/organization")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/organization");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/datasource")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/datasource");
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/project")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/project");
Assertions.assertEquals(5, tmp.count());
sc
.textFile(
getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/dump/community_infrastructure")
.getPath())
.saveAsTextFile(workingDir.toString() + "/dump/community_infrastructure");
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("enermaps")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("eutopia")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("dh-ch")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("beopen")).count());
Assertions.assertEquals(1, tmp.filter(cr -> cr.getAcronym().equals("neanias-underwater")).count());
SparkSelectValidRelationContext
.main(
new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", workingDir.toString() + "/dump",
"-contextRelationPath", contextRelationPath
});
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Relation> tmp = sc
.textFile(workingDir.toString() + "/dump/relation")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
Assertions.assertEquals(10, tmp.count());
Assertions.assertEquals(5, tmp.filter(r -> r.getSource().getId().startsWith("00")).count());
Assertions.assertEquals(5, tmp.filter(r -> r.getTarget().getId().startsWith("00")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getSource().getId().startsWith("10")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getTarget().getId().startsWith("10")).count());
Assertions.assertEquals(1, tmp.filter(r -> r.getSource().getId().startsWith("40")).count());
Assertions.assertEquals(1, tmp.filter(r -> r.getTarget().getId().startsWith("40")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getSource().getId().startsWith("20")).count());
Assertions.assertEquals(2, tmp.filter(r -> r.getTarget().getId().startsWith("20")).count());
}
@Test
public void extractRelationFromResultTest() {
final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/original/publication")
.getPath();
final String communityMapPath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/subset/communityMap")
.getPath();
Extractor ex = new Extractor();
ex
.run(
false, sourcePath, workingDir.toString() + "/relation",
// eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
eu.dnetlib.dhp.schema.oaf.Publication.class, communityMapPath);
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<eu.dnetlib.dhp.oa.model.graph.Relation> tmp = sc
.textFile(workingDir.toString() + "/relation")
.map(item -> OBJECT_MAPPER.readValue(item, eu.dnetlib.dhp.oa.model.graph.Relation.class));
Assertions.assertEquals(80, tmp.count());
Assertions.assertEquals(40, tmp.filter(r -> r.getSource().getId().startsWith("50|")).count());
Assertions.assertEquals(30, tmp.filter(r -> r.getSource().getId().startsWith("10|")).count());
Assertions.assertEquals(10, tmp.filter(r -> r.getSource().getId().startsWith("00|")).count());
}
}

View File

@ -0,0 +1,4 @@
{"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","originalId":["doajarticles::1798-355X"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"not available","officialname":"Pelitutkimuksen vuosikirja","englishname":"Pelitutkimuksen vuosikirja","websiteurl":"http://www.pelitutkimus.fi","logourl":null,"dateofvalidation":null,"description":null,"subjects":["Geography. Anthropology. Recreation: Recreation. Leisure | Science: Mathematics: Instruments and machines: Electronic computers. Computer science: Computer software"],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}
{"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","originalId":["doajarticles::1879-9337"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"collected from a compatible aggregator","officialname":"Review of Development Finance","englishname":"Review of Development Finance","websiteurl":"http://www.journals.elsevier.com/review-of-development-finance/","logourl":null,"dateofvalidation":null,"description":null,"subjects":["Social Sciences: Industries. Land use. Labor: Economic growth, development, planning | Social Sciences: Finance"],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}
{"id":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","originalId":["doajarticles::1048-9533"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"collected from a compatible aggregator","officialname":"Journal of Applied Mathematics and Stochastic Analysis","englishname":"Journal of Applied Mathematics and Stochastic Analysis","websiteurl":"https://www.hindawi.com/journals/jamsa","logourl":null,"dateofvalidation":null,"description":null,"subjects":[],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}
{"id":"10|doajarticles::a5314b60f79b869cb5d3a2709167bc3a","originalId":["doajarticles::0322-788X"],"pid":[],"datasourcetype":{"scheme":"pubsrepository::journal","value":"Journal"},"openairecompatibility":"collected from a compatible aggregator","officialname":"Statistika: Statistics and Economy Journal","englishname":"Statistika: Statistics and Economy Journal","websiteurl":"http://www.czso.cz/statistika_journal","logourl":null,"dateofvalidation":null,"description":null,"subjects":["Social Sciences: Statistics"],"languages":[],"contenttypes":["Journal articles"],"releasestartdate":null,"releaseenddate":null,"missionstatementurl":null,"accessrights":null,"uploadrights":null,"databaseaccessrestriction":null,"datauploadrestriction":null,"versioning":false,"citationguidelineurl":null,"pidsystems":null,"certificates":null,"policies":[],"journal":null}

View File

@ -0,0 +1,3 @@
{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","websiteurl":null,"code":"135027","acronym":null,"title":"Dynamic 3D resolution-enhanced low-coherence interferometric imaging / Consortium: Hi-Lo","startdate":null,"enddate":null,"callidentifier":"Fotoniikka ja modernit kuvantamismenetelmät LT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|aka_________::9d1af21dbd0f5bc719f71553d19a6b3a","websiteurl":null,"code":"316061","acronym":null,"title":"Finnish Imaging of Degenerative Shoulder Study (FIMAGE): A study on the prevalence of degenerative imaging changes of the shoulder and their relevance to clinical symptoms in the general population.","startdate":null,"enddate":null,"callidentifier":"Academy Project Funding TT","keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"AKA","name":"Academy of Finland","jurisdiction":"FI","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}
{"id":"40|anr_________::1f21edc5c902be305ee47148955c6e50","websiteurl":null,"code":"ANR-17-CE05-0033","acronym":"MOISE","title":"METAL OXIDES AS LOW LOADED NANO-IRIDIUM SUPPORT FOR COMPETITIVE WATER ELECTROLYSIS","startdate":null,"enddate":null,"callidentifier":null,"keywords":null,"openaccessmandateforpublications":false,"openaccessmandatefordataset":false,"subject":[],"funding":[{"shortName":"ANR","name":"French National Research Agency (ANR)","jurisdiction":"FR","funding_stream":null}],"summary":null,"granted":null,"h2020programme":[]}

View File

@ -0,0 +1,25 @@
{"source":{"id":"00|context_____::99c8ef576f385bc322564d5694df6fc2","type":"context"},"target":{"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|doajarticles::9c4b678901e5276d9e3addee566816af","type":"datasource"},"target":{"id":"00|context_____::99c8ef576f385bc322564d5694df6fc2","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::e15922110564cf669aaed346e871bc01","type":"context"},"target":{"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|doajarticles::acb7c79bb85d3b3a7b75389f5d9570f5","type":"datasource"},"target":{"id":"00|context_____::e15922110564cf669aaed346e871bc01","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","type":"context"},"target":{"id":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|doajarticles::1fa6859d71faa77b32d82f278c6ed1df","type":"datasource"},"target":{"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","type":"context"},"target":{"id":"10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|doajarticles::6eb31d13b12bc06bbac06aef63cf33c9","type":"datasource"},"target":{"id":"00|context_____::aa0e56dd2e9d2a0be749f5debdd2b3d8","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"target":{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c4","type":"datasource"},"target":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"target":{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c2","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"40|aka_________::01bb7b48e29d732a1c7bc5150b9195c2","type":"datasource"},"target":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"target":{"id":"10|openaire____::c5502a43e76feab55dd00cf50f519125","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|openaire____::c5502a43e76feab55dd00cf50f519125","type":"datasource"},"target":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"target":{"id":"10|re3data_____::a48f09c562b247a9919acfe195549b47","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|re3data_____::a48f09c562b247a9919acfe195549b47","type":"datasource"},"target":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"target":{"id":"10|opendoar____::97275a23ca44226c9964043c8462be96","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|opendoar____::97275a23ca44226c9964043c8462be96","type":"datasource"},"target":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"target":{"id":"10|doajarticles::2899208a99aa7d142646e0a80bfeef05","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|doajarticles::2899208a99aa7d142646e0a80bfeef05","type":"datasource"},"target":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::e6c151d449e1db05b1ffb5ad5ec656cf","type":"context"},"target":{"id":"10|re3data_____::5b9bf9171d92df854cf3c520692e9122","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|re3data_____::5b9bf9171d92df854cf3c520692e9122","type":"datasource"},"target":{"id":"00|context_____::e6c151d449e1db05b1ffb5ad5ec656cf","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::e6c151d449e1db05b1ffb5ad5ec656cf","type":"context"},"target":{"id":"10|doajarticles::c7d3de67dc77af72f6747157441252ec","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"10|doajarticles::c7d3de67dc77af72f6747157441252ec","type":"datasource"},"target":{"id":"00|context_____::e6c151d449e1db05b1ffb5ad5ec656cf","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::e6c151d449e1db05b1ffb5ad5ec656cf","type":"context"},"target":{"id":"10|re3data_____::8515794670370f49c1d176c399c714f5","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}

View File

@ -0,0 +1,6 @@
{"source":{"id":"20|grid________::87698402476531ba39e61f1df38f2a91","type":"datasource"},"target":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::04a00617ca659adc944977ac700ea14b","type":"context"},"target":{"id":"20|grid________::87698402476531ba39e61f1df38f2a91","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","type":"datasource"},"target":{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::5fde864866ea5ded4cc873b3170b63c3","type":"context"},"target":{"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","type":"datasource"},"target":{"id":"00|context_____::e6c151d449e1db05b1ffb5ad5ec656cf","type":"context"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}
{"source":{"id":"00|context_____::e6c151d449e1db05b1ffb5ad5ec656cf","type":"context"},"target":{"id":"20|grid________::94948cc036605bf4a00ec77ce5ca92d3","type":"datasource"},"reltype":{"name":"IsRelatedTo","type":"relationship"},"provenance":{"provenance":"Linked by user","trust":"0.9"},"validated":false,"validationDate":null}

View File

@ -0,0 +1,56 @@
# Root logger option
log4j.rootLogger=DEBUG, stdout
# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n
# Change this to set Spark log level
log4j.logger.org.apache.spark=ERROR
log4j.rootCategory=WARN
# Silence akka remoting
log4j.logger.Remoting=WARN
# Ignore messages below warning level from Jetty, because it's a bit verbose
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitterFactory=WARN
log4j.logger.org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter=WARN
#log4j.logger.org.apache.parquet.hadoop.ParquetOutputFormat=WARN
#log4j.logger.org.apache.parquet.hadoop.InternalParquetRecordWriter=WARN
log4j.logger.org.apache.hadoop.io.compress.CodecPool=WARN
#log4j.logger.org.apache.hadoop.io.compress=WARN
#log4j.logger.org.apache.parquet.hadoop.codec.CodecConfig=WARN
log4j.logger.parquet.hadoop.ColumnChunkPageWriteStore=ERROR
log4j.logger.com.jayway.jsonpath.internal.path.CompiledPath=WARN
log4j.logger.org.apache.parquet.hadoop.ParquetRecordReader=ERROR
log4j.logger.parquet.hadoop=WARN
log4j.logger.org.eclipse.jetty.server.handler.ContextHandlerCollection=WARN
log4j.logger.org.spark_project.jetty.util.component.ContainerLifeCycle=WARN
log4j.logger.org.apache.hadoop.mapred.FileInputFormat=WARN
log4j.logger.org.spark_project.jetty.servlet.ServletHandler=WARN
log4j.logger.org.apache.commons.beanutils.converters.BooleanConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.StringConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.LongConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.ArrayConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.FloatConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.IntegerConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.DoubleConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.CharacterConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.ByteConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.BigIntegerConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.BigDecimalConverter=WARN
log4j.logger.org.apache.commons.beanutils.converters.ShortConverter=WARN
log4j.logger.org.apache.commons.beanutils.BeanUtils=WARN
log4j.logger.org.apache.hadoop.metrics2.lib=ERROR