forked from D-Net/dnet-hadoop
ActionManager promote: allow to ingest person records in a graph that did not contain them, bumped dhp-schemas version
This commit is contained in:
parent
9486e21a44
commit
6bdb8643e6
|
@ -151,12 +151,17 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
SparkSession spark, String path, Class<G> rowClazz) {
|
||||
logger.info("Reading graph table from path: {}", path);
|
||||
|
||||
if (HdfsSupport.exists(path, spark.sparkContext().hadoopConfiguration())) {
|
||||
return spark
|
||||
.read()
|
||||
.textFile(path)
|
||||
.map(
|
||||
(MapFunction<String, G>) value -> OBJECT_MAPPER.readValue(value, rowClazz),
|
||||
Encoders.bean(rowClazz));
|
||||
} else {
|
||||
logger.info("Found empty graph table from path: {}", path);
|
||||
return spark.emptyDataset(Encoders.bean(rowClazz));
|
||||
}
|
||||
}
|
||||
|
||||
private static <A extends Oaf> Dataset<A> readActionPayload(
|
||||
|
@ -223,7 +228,7 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
rowClazz,
|
||||
actionPayloadClazz);
|
||||
|
||||
if (shouldGroupById) {
|
||||
if (Boolean.TRUE.equals(shouldGroupById)) {
|
||||
return PromoteActionPayloadFunctions
|
||||
.groupGraphTableByIdAndMerge(
|
||||
joinedAndMerged, rowIdFn, mergeRowsAndGetFn, zeroFn, isNotZeroFn, rowClazz);
|
||||
|
@ -250,6 +255,8 @@ public class PromoteActionPayloadForGraphTableJob {
|
|||
return () -> clazz.cast(new eu.dnetlib.dhp.schema.oaf.Relation());
|
||||
case "eu.dnetlib.dhp.schema.oaf.Software":
|
||||
return () -> clazz.cast(new eu.dnetlib.dhp.schema.oaf.Software());
|
||||
case "eu.dnetlib.dhp.schema.oaf.Person":
|
||||
return () -> clazz.cast(new eu.dnetlib.dhp.schema.oaf.Person());
|
||||
default:
|
||||
throw new RuntimeException("unknown class: " + clazz.getCanonicalName());
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ public class PromoteActionPayloadFunctions {
|
|||
PromoteAction.Strategy promoteActionStrategy,
|
||||
Class<G> rowClazz,
|
||||
Class<A> actionPayloadClazz) {
|
||||
if (!isSubClass(rowClazz, actionPayloadClazz)) {
|
||||
if (Boolean.FALSE.equals(isSubClass(rowClazz, actionPayloadClazz))) {
|
||||
throw new RuntimeException(
|
||||
"action payload type must be the same or be a super type of table row type");
|
||||
}
|
||||
|
|
|
@ -77,7 +77,6 @@
|
|||
<switch>
|
||||
<case to="PromotePersonActionPayloadForPersonTable">
|
||||
${(activePromotePersonActionPayload eq "true") and
|
||||
(fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputGraphRootPath')),'/'),'person')) eq "true") and
|
||||
(fs:exists(concat(concat(concat(concat(wf:conf('nameNode'),'/'),wf:conf('inputActionPayloadRootPath')),'/'),'clazz=eu.dnetlib.dhp.schema.oaf.Person')) eq "true")}
|
||||
</case>
|
||||
<default to="SkipPromotePersonActionPayloadForPersonTable"/>
|
||||
|
|
2
pom.xml
2
pom.xml
|
@ -937,7 +937,7 @@
|
|||
<commons.logging.version>1.1.3</commons.logging.version>
|
||||
<commons-validator.version>1.7</commons-validator.version>
|
||||
<dateparser.version>1.0.7</dateparser.version>
|
||||
<dhp-schemas.version>[7.0.1]</dhp-schemas.version>
|
||||
<dhp-schemas.version>[7.0.2]</dhp-schemas.version>
|
||||
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
|
||||
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
|
||||
<dhp.guava.version>11.0.2</dhp.guava.version>
|
||||
|
|
Loading…
Reference in New Issue