forked from D-Net/dnet-hadoop
partial refactoring
This commit is contained in:
parent
5e23fb3a74
commit
97177d7f7b
|
@ -57,7 +57,6 @@ import eu.dnetlib.dhp.broker.oa.matchers.simple.EnrichMoreSubject;
|
|||
import eu.dnetlib.dhp.broker.oa.util.BrokerConstants;
|
||||
import eu.dnetlib.dhp.broker.oa.util.UpdateInfo;
|
||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
|
@ -87,25 +86,32 @@ public class GenerateEventsApplication {
|
|||
private static final UpdateMatcher<Pair<Result, List<Software>>, ?> enrichMoreSoftware = new EnrichMoreSoftware();
|
||||
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMisissingPublicationIsRelatedTo = new EnrichMissingPublicationIsRelatedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsReferencedBy = new EnrichMissingPublicationIsReferencedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsReferencedBy =
|
||||
new EnrichMissingPublicationIsReferencedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationReferences = new EnrichMissingPublicationReferences();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedTo = new EnrichMissingPublicationIsSupplementedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedBy = new EnrichMissingPublicationIsSupplementedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedTo =
|
||||
new EnrichMissingPublicationIsSupplementedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<Publication>>, ?> enrichMissingPublicationIsSupplementedBy =
|
||||
new EnrichMissingPublicationIsSupplementedBy();
|
||||
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMisissingDatasetIsRelatedTo = new EnrichMissingDatasetIsRelatedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsReferencedBy = new EnrichMissingDatasetIsReferencedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetReferences = new EnrichMissingDatasetReferences();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedTo = new EnrichMissingDatasetIsSupplementedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedBy = new EnrichMissingDatasetIsSupplementedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMisissingDatasetIsRelatedTo =
|
||||
new EnrichMissingDatasetIsRelatedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsReferencedBy =
|
||||
new EnrichMissingDatasetIsReferencedBy();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetReferences =
|
||||
new EnrichMissingDatasetReferences();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedTo =
|
||||
new EnrichMissingDatasetIsSupplementedTo();
|
||||
private static final UpdateMatcher<Pair<Result, List<eu.dnetlib.dhp.schema.oaf.Dataset>>, ?> enrichMissingDatasetIsSupplementedBy =
|
||||
new EnrichMissingDatasetIsSupplementedBy();
|
||||
|
||||
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
GenerateEventsApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
|
||||
.toString(GenerateEventsApplication.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/merge_claims_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
|
||||
final Boolean isSparkSessionManaged = Optional
|
||||
|
@ -128,10 +134,13 @@ public class GenerateEventsApplication {
|
|||
|
||||
final JavaRDD<Event> eventsRdd = sc.emptyRDD();
|
||||
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, Publication.class));
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, eu.dnetlib.dhp.schema.oaf.Dataset.class));
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, Software.class));
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, OtherResearchProduct.class));
|
||||
for (final Class<? extends Result> r1 : BrokerConstants.RESULT_CLASSES) {
|
||||
eventsRdd.union(generateSimpleEvents(spark, graphPath, r1));
|
||||
|
||||
for (final Class<? extends Result> r2 : BrokerConstants.RESULT_CLASSES) {
|
||||
eventsRdd.union(generateRelationEvents(spark, graphPath, r1, r2));
|
||||
}
|
||||
}
|
||||
|
||||
eventsRdd.saveAsTextFile(eventsPath, GzipCodec.class);
|
||||
});
|
||||
|
@ -146,9 +155,8 @@ public class GenerateEventsApplication {
|
|||
final String graphPath,
|
||||
final Class<R> resultClazz) {
|
||||
|
||||
final Dataset<R> results = readPath(
|
||||
spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference());
|
||||
final Dataset<R> results = readPath(spark, graphPath + "/" + resultClazz.getSimpleName().toLowerCase(), resultClazz)
|
||||
.filter(r -> r.getDataInfo().getDeletedbyinference());
|
||||
|
||||
final Dataset<Relation> rels = readPath(spark, graphPath + "/relation", Relation.class)
|
||||
.filter(r -> r.getRelClass().equals(BrokerConstants.IS_MERGED_IN_CLASS));
|
||||
|
@ -169,6 +177,14 @@ public class GenerateEventsApplication {
|
|||
|
||||
}
|
||||
|
||||
private static <SRC extends Result, TRG extends Result> JavaRDD<Event> generateRelationEvents(final SparkSession spark,
|
||||
final String graphPath,
|
||||
final Class<SRC> sourceClass,
|
||||
final Class<TRG> targetClass) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
private List<Event> generateSimpleEvents(final Collection<Result> children) {
|
||||
final List<UpdateInfo<?>> list = new ArrayList<>();
|
||||
|
||||
|
|
|
@ -1,9 +1,21 @@
|
|||
|
||||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class BrokerConstants {
|
||||
|
||||
public final static String OPEN_ACCESS = "OPEN";
|
||||
public final static String IS_MERGED_IN_CLASS = "isMergedIn";
|
||||
public static final String OPEN_ACCESS = "OPEN";
|
||||
public static final String IS_MERGED_IN_CLASS = "isMergedIn";
|
||||
|
||||
public static final List<Class<? extends Result>> RESULT_CLASSES =
|
||||
Arrays.asList(Publication.class, Dataset.class, Software.class, OtherResearchProduct.class);
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue