forked from D-Net/dnet-hadoop
some fixes
This commit is contained in:
parent
834f139e6e
commit
4822747313
|
@ -29,8 +29,9 @@ import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultAggregator;
|
|||
import eu.dnetlib.dhp.broker.oa.util.aggregators.simple.ResultGroup;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.OpenaireBrokerResultAggregator;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedEntityFactory;
|
||||
import eu.dnetlib.dhp.broker.oa.util.aggregators.withRels.RelatedProject;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
|
||||
|
@ -83,9 +84,11 @@ public class GenerateEventsApplication {
|
|||
removeOutputDir(spark, eventsPath);
|
||||
|
||||
// TODO REMOVE THIS
|
||||
readPath(spark, graphPath + "/publication", Publication.class)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference())
|
||||
.map(ConversionUtils::oafResultToBrokerResult, Encoders.bean(OpenaireBrokerResult.class))
|
||||
final Dataset<Project> projects = readPath(spark, graphPath + "/project", Project.class);
|
||||
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.cache();
|
||||
relatedEntities(projects, rels, RelatedProject.class)
|
||||
.write()
|
||||
.mode(SaveMode.Overwrite)
|
||||
.json(eventsPath);
|
||||
|
@ -129,7 +132,7 @@ public class GenerateEventsApplication {
|
|||
(MapFunction<Tuple2<OpenaireBrokerResult, Relation>, String>) t -> t._2.getTarget(), Encoders.STRING())
|
||||
.agg(aggr)
|
||||
.map((MapFunction<Tuple2<String, ResultGroup>, ResultGroup>) t -> t._2, Encoders.bean(ResultGroup.class))
|
||||
.filter(ResultGroup::isValid)
|
||||
.filter(rg -> rg.getData().size() > 1)
|
||||
.map(
|
||||
(MapFunction<ResultGroup, EventGroup>) g -> EventFinder.generateEvents(g, dedupConfig),
|
||||
Encoders.bean(EventGroup.class))
|
||||
|
@ -141,15 +144,15 @@ public class GenerateEventsApplication {
|
|||
final String graphPath,
|
||||
final Class<SRC> sourceClass) {
|
||||
|
||||
// final Dataset<Project> projects = readPath(spark, graphPath + "/project", Project.class);
|
||||
final Dataset<Project> projects = readPath(spark, graphPath + "/project", Project.class);
|
||||
// final Dataset<eu.dnetlib.dhp.schema.oaf.Dataset> datasets = readPath(
|
||||
// spark, graphPath + "/dataset", eu.dnetlib.dhp.schema.oaf.Dataset.class);
|
||||
// final Dataset<Software> softwares = readPath(spark, graphPath + "/software", Software.class);
|
||||
// final Dataset<Publication> publications = readPath(spark, graphPath + "/publication", Publication.class);
|
||||
|
||||
// final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
// .filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
// .cache();
|
||||
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> !r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS))
|
||||
.cache();
|
||||
|
||||
final Dataset<OpenaireBrokerResult> r0 = readPath(
|
||||
spark, graphPath + "/" + sourceClass.getSimpleName().toLowerCase(), sourceClass)
|
||||
|
@ -157,8 +160,7 @@ public class GenerateEventsApplication {
|
|||
.map(ConversionUtils::oafResultToBrokerResult, Encoders.bean(OpenaireBrokerResult.class));
|
||||
|
||||
// TODO UNCOMMENT THIS
|
||||
// final Dataset<OpenaireBrokerResult> r1 = join(r0, rels, relatedEntities(projects, rels,
|
||||
// RelatedProject.class));
|
||||
final Dataset<OpenaireBrokerResult> r1 = join(r0, rels, relatedEntities(projects, rels, RelatedProject.class));
|
||||
// final Dataset<OpenaireBrokerResult> r2 = join(r1, rels, relatedEntities(softwares, rels,
|
||||
// RelatedSoftware.class));
|
||||
// final Dataset<OpenaireBrokerResult> r3 = join(r2, rels, relatedEntities(datasets, rels,
|
||||
|
|
|
@ -7,7 +7,6 @@ import java.util.Objects;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.dom4j.Document;
|
||||
import org.dom4j.DocumentException;
|
||||
import org.dom4j.DocumentHelper;
|
||||
|
@ -59,10 +58,6 @@ public class ConversionUtils {
|
|||
return sp != null ? new TypedValue(classId(sp.getQualifier()), sp.getValue()) : null;
|
||||
}
|
||||
|
||||
public static final Pair<String, String> oafSubjectToPair(final StructuredProperty sp) {
|
||||
return sp != null ? Pair.of(classId(sp.getQualifier()), sp.getValue()) : null;
|
||||
}
|
||||
|
||||
public static final eu.dnetlib.broker.objects.Dataset oafDatasetToBrokerDataset(final Dataset d) {
|
||||
if (d == null) {
|
||||
return null;
|
||||
|
@ -123,55 +118,6 @@ public class ConversionUtils {
|
|||
return res;
|
||||
}
|
||||
|
||||
private static List<TypedValue> structPropTypedList(final List<StructuredProperty> list) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(ConversionUtils::oafStructPropToBrokerTypedValue)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <F, T> List<T> mappedList(final List<F> list, final Function<F, T> func) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(func::apply)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(func::apply)
|
||||
.flatMap(List::stream)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) {
|
||||
if (list == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(func::apply)
|
||||
.filter(Objects::nonNull)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
private static eu.dnetlib.broker.objects.Author oafAuthorToBrokerAuthor(final Author author) {
|
||||
if (author == null) {
|
||||
return null;
|
||||
|
@ -300,4 +246,55 @@ public class ConversionUtils {
|
|||
.collect(Collectors.toList())
|
||||
: new ArrayList<>();
|
||||
}
|
||||
|
||||
private static List<TypedValue> structPropTypedList(final List<StructuredProperty> list) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(ConversionUtils::oafStructPropToBrokerTypedValue)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <F, T> List<T> mappedList(final List<F> list, final Function<F, T> func) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(func::apply)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <F, T> List<T> flatMappedList(final List<F> list, final Function<F, List<T>> func) {
|
||||
if (list == null) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(func::apply)
|
||||
.flatMap(List::stream)
|
||||
.filter(Objects::nonNull)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static <F, T> T mappedFirst(final List<F> list, final Function<F, T> func) {
|
||||
if (list == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return list
|
||||
.stream()
|
||||
.map(func::apply)
|
||||
.filter(Objects::nonNull)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -23,12 +23,14 @@ public class ResultAggregator extends Aggregator<Tuple2<OpenaireBrokerResult, Re
|
|||
|
||||
@Override
|
||||
public ResultGroup reduce(final ResultGroup group, final Tuple2<OpenaireBrokerResult, Relation> t) {
|
||||
return group.addElement(t._1);
|
||||
group.getData().add(t._1);
|
||||
return group;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultGroup merge(final ResultGroup g1, final ResultGroup g2) {
|
||||
return g1.addGroup(g2);
|
||||
g1.getData().addAll(g2.getData());
|
||||
return g1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -14,23 +14,14 @@ public class ResultGroup implements Serializable {
|
|||
*/
|
||||
private static final long serialVersionUID = -3360828477088669296L;
|
||||
|
||||
private final List<OpenaireBrokerResult> data = new ArrayList<>();
|
||||
private List<OpenaireBrokerResult> data = new ArrayList<>();
|
||||
|
||||
public List<OpenaireBrokerResult> getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public ResultGroup addElement(final OpenaireBrokerResult elem) {
|
||||
data.add(elem);
|
||||
return this;
|
||||
public void setData(final List<OpenaireBrokerResult> data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public ResultGroup addGroup(final ResultGroup group) {
|
||||
data.addAll(group.getData());
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return data.size() > 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,9 +11,12 @@ public class RelatedDataset implements Serializable {
|
|||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 774487705184038324L;
|
||||
private final String source;
|
||||
private final String relType;
|
||||
private final Dataset relDataset;
|
||||
private String source;
|
||||
private String relType;
|
||||
private Dataset relDataset;
|
||||
|
||||
public RelatedDataset() {
|
||||
}
|
||||
|
||||
public RelatedDataset(final String source, final String relType, final Dataset relDataset) {
|
||||
this.source = source;
|
||||
|
@ -25,12 +28,24 @@ public class RelatedDataset implements Serializable {
|
|||
return source;
|
||||
}
|
||||
|
||||
public void setSource(final String source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public String getRelType() {
|
||||
return relType;
|
||||
}
|
||||
|
||||
public void setRelType(final String relType) {
|
||||
this.relType = relType;
|
||||
}
|
||||
|
||||
public Dataset getRelDataset() {
|
||||
return relDataset;
|
||||
}
|
||||
|
||||
public void setRelDataset(final Dataset relDataset) {
|
||||
this.relDataset = relDataset;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -12,9 +12,12 @@ public class RelatedProject implements Serializable {
|
|||
*/
|
||||
private static final long serialVersionUID = 4941437626549329870L;
|
||||
|
||||
private final String source;
|
||||
private final String relType;
|
||||
private final Project relProject;
|
||||
private String source;
|
||||
private String relType;
|
||||
private Project relProject;
|
||||
|
||||
public RelatedProject() {
|
||||
}
|
||||
|
||||
public RelatedProject(final String source, final String relType, final Project relProject) {
|
||||
this.source = source;
|
||||
|
@ -26,12 +29,24 @@ public class RelatedProject implements Serializable {
|
|||
return source;
|
||||
}
|
||||
|
||||
public void setSource(final String source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public String getRelType() {
|
||||
return relType;
|
||||
}
|
||||
|
||||
public void setRelType(final String relType) {
|
||||
this.relType = relType;
|
||||
}
|
||||
|
||||
public Project getRelProject() {
|
||||
return relProject;
|
||||
}
|
||||
|
||||
public void setRelProject(final Project relProject) {
|
||||
this.relProject = relProject;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -12,9 +12,12 @@ public class RelatedPublication implements Serializable {
|
|||
*/
|
||||
private static final long serialVersionUID = 9021609640411395128L;
|
||||
|
||||
private final String source;
|
||||
private final String relType;
|
||||
private final Publication relPublication;
|
||||
private String source;
|
||||
private String relType;
|
||||
private Publication relPublication;
|
||||
|
||||
public RelatedPublication() {
|
||||
}
|
||||
|
||||
public RelatedPublication(final String source, final String relType, final Publication relPublication) {
|
||||
this.source = source;
|
||||
|
@ -26,12 +29,24 @@ public class RelatedPublication implements Serializable {
|
|||
return source;
|
||||
}
|
||||
|
||||
public void setSource(final String source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public String getRelType() {
|
||||
return relType;
|
||||
}
|
||||
|
||||
public void setRelType(final String relType) {
|
||||
this.relType = relType;
|
||||
}
|
||||
|
||||
public Publication getRelPublication() {
|
||||
return relPublication;
|
||||
}
|
||||
|
||||
public void setRelPublication(final Publication relPublication) {
|
||||
this.relPublication = relPublication;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -11,9 +11,12 @@ public class RelatedSoftware implements Serializable {
|
|||
*
|
||||
*/
|
||||
private static final long serialVersionUID = 7573383356943300157L;
|
||||
private final String source;
|
||||
private final String relType;
|
||||
private final Software relSoftware;
|
||||
private String source;
|
||||
private String relType;
|
||||
private Software relSoftware;
|
||||
|
||||
public RelatedSoftware() {
|
||||
}
|
||||
|
||||
public RelatedSoftware(final String source, final String relType, final Software relSoftware) {
|
||||
this.source = source;
|
||||
|
@ -25,12 +28,24 @@ public class RelatedSoftware implements Serializable {
|
|||
return source;
|
||||
}
|
||||
|
||||
public void setSource(final String source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
public String getRelType() {
|
||||
return relType;
|
||||
}
|
||||
|
||||
public void setRelType(final String relType) {
|
||||
this.relType = relType;
|
||||
}
|
||||
|
||||
public Software getRelSoftware() {
|
||||
return relSoftware;
|
||||
}
|
||||
|
||||
public void setRelSoftware(final Software relSoftware) {
|
||||
this.relSoftware = relSoftware;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue