code formatting

This commit is contained in:
Claudio Atzori 2020-05-11 17:38:08 +02:00
parent 6d0b11252e
commit c6b028f2af
25 changed files with 491 additions and 476 deletions

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.schema.common; package eu.dnetlib.dhp.schema.common;
import java.util.Map; import java.util.Map;
@ -65,216 +66,216 @@ public class ModelSupport {
static { static {
relationInverseMap relationInverseMap
.put( .put(
"personResult_authorship_isAuthorOf", new RelationInverse() "personResult_authorship_isAuthorOf", new RelationInverse()
.setRelation("isAuthorOf") .setRelation("isAuthorOf")
.setInverse("hasAuthor") .setInverse("hasAuthor")
.setRelType("personResult") .setRelType("personResult")
.setSubReltype("authorship")); .setSubReltype("authorship"));
relationInverseMap relationInverseMap
.put( .put(
"personResult_authorship_hasAuthor", new RelationInverse() "personResult_authorship_hasAuthor", new RelationInverse()
.setInverse("isAuthorOf") .setInverse("isAuthorOf")
.setRelation("hasAuthor") .setRelation("hasAuthor")
.setRelType("personResult") .setRelType("personResult")
.setSubReltype("authorship")); .setSubReltype("authorship"));
relationInverseMap relationInverseMap
.put( .put(
"projectOrganization_participation_isParticipant", new RelationInverse() "projectOrganization_participation_isParticipant", new RelationInverse()
.setRelation("isParticipant") .setRelation("isParticipant")
.setInverse("hasParticipant") .setInverse("hasParticipant")
.setRelType("projectOrganization") .setRelType("projectOrganization")
.setSubReltype("participation")); .setSubReltype("participation"));
relationInverseMap relationInverseMap
.put( .put(
"projectOrganization_participation_hasParticipant", new RelationInverse() "projectOrganization_participation_hasParticipant", new RelationInverse()
.setInverse("isParticipant") .setInverse("isParticipant")
.setRelation("hasParticipant") .setRelation("hasParticipant")
.setRelType("projectOrganization") .setRelType("projectOrganization")
.setSubReltype("participation")); .setSubReltype("participation"));
relationInverseMap relationInverseMap
.put( .put(
"resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse() "resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse()
.setRelation("hasAuthorInstitution") .setRelation("hasAuthorInstitution")
.setInverse("isAuthorInstitutionOf") .setInverse("isAuthorInstitutionOf")
.setRelType("resultOrganization") .setRelType("resultOrganization")
.setSubReltype("affiliation")); .setSubReltype("affiliation"));
relationInverseMap relationInverseMap
.put( .put(
"resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse() "resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse()
.setInverse("hasAuthorInstitution") .setInverse("hasAuthorInstitution")
.setRelation("isAuthorInstitutionOf") .setRelation("isAuthorInstitutionOf")
.setRelType("resultOrganization") .setRelType("resultOrganization")
.setSubReltype("affiliation")); .setSubReltype("affiliation"));
relationInverseMap relationInverseMap
.put( .put(
"organizationOrganization_dedup_merges", new RelationInverse() "organizationOrganization_dedup_merges", new RelationInverse()
.setRelation("merges") .setRelation("merges")
.setInverse("isMergedIn") .setInverse("isMergedIn")
.setRelType("organizationOrganization") .setRelType("organizationOrganization")
.setSubReltype("dedup")); .setSubReltype("dedup"));
relationInverseMap relationInverseMap
.put( .put(
"organizationOrganization_dedup_isMergedIn", new RelationInverse() "organizationOrganization_dedup_isMergedIn", new RelationInverse()
.setInverse("merges") .setInverse("merges")
.setRelation("isMergedIn") .setRelation("isMergedIn")
.setRelType("organizationOrganization") .setRelType("organizationOrganization")
.setSubReltype("dedup")); .setSubReltype("dedup"));
relationInverseMap relationInverseMap
.put( .put(
"organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse() "organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse()
.setInverse("isSimilarTo") .setInverse("isSimilarTo")
.setRelation("isSimilarTo") .setRelation("isSimilarTo")
.setRelType("organizationOrganization") .setRelType("organizationOrganization")
.setSubReltype("dedupSimilarity")); .setSubReltype("dedupSimilarity"));
relationInverseMap relationInverseMap
.put( .put(
"resultProject_outcome_isProducedBy", new RelationInverse() "resultProject_outcome_isProducedBy", new RelationInverse()
.setRelation("isProducedBy") .setRelation("isProducedBy")
.setInverse("produces") .setInverse("produces")
.setRelType("resultProject") .setRelType("resultProject")
.setSubReltype("outcome")); .setSubReltype("outcome"));
relationInverseMap relationInverseMap
.put( .put(
"resultProject_outcome_produces", new RelationInverse() "resultProject_outcome_produces", new RelationInverse()
.setInverse("isProducedBy") .setInverse("isProducedBy")
.setRelation("produces") .setRelation("produces")
.setRelType("resultProject") .setRelType("resultProject")
.setSubReltype("outcome")); .setSubReltype("outcome"));
relationInverseMap relationInverseMap
.put( .put(
"projectPerson_contactPerson_isContact", new RelationInverse() "projectPerson_contactPerson_isContact", new RelationInverse()
.setRelation("isContact") .setRelation("isContact")
.setInverse("hasContact") .setInverse("hasContact")
.setRelType("projectPerson") .setRelType("projectPerson")
.setSubReltype("contactPerson")); .setSubReltype("contactPerson"));
relationInverseMap relationInverseMap
.put( .put(
"projectPerson_contactPerson_hasContact", new RelationInverse() "projectPerson_contactPerson_hasContact", new RelationInverse()
.setInverse("isContact") .setInverse("isContact")
.setRelation("hasContact") .setRelation("hasContact")
.setRelType("personPerson") .setRelType("personPerson")
.setSubReltype("coAuthorship")); .setSubReltype("coAuthorship"));
relationInverseMap relationInverseMap
.put( .put(
"personPerson_coAuthorship_isCoauthorOf", new RelationInverse() "personPerson_coAuthorship_isCoauthorOf", new RelationInverse()
.setInverse("isCoAuthorOf") .setInverse("isCoAuthorOf")
.setRelation("isCoAuthorOf") .setRelation("isCoAuthorOf")
.setRelType("personPerson") .setRelType("personPerson")
.setSubReltype("coAuthorship")); .setSubReltype("coAuthorship"));
relationInverseMap relationInverseMap
.put( .put(
"personPerson_dedup_merges", new RelationInverse() "personPerson_dedup_merges", new RelationInverse()
.setInverse("isMergedIn") .setInverse("isMergedIn")
.setRelation("merges") .setRelation("merges")
.setRelType("personPerson") .setRelType("personPerson")
.setSubReltype("dedup")); .setSubReltype("dedup"));
relationInverseMap relationInverseMap
.put( .put(
"personPerson_dedup_isMergedIn", new RelationInverse() "personPerson_dedup_isMergedIn", new RelationInverse()
.setInverse("merges") .setInverse("merges")
.setRelation("isMergedIn") .setRelation("isMergedIn")
.setRelType("personPerson") .setRelType("personPerson")
.setSubReltype("dedup")); .setSubReltype("dedup"));
relationInverseMap relationInverseMap
.put( .put(
"personPerson_dedupSimilarity_isSimilarTo", new RelationInverse() "personPerson_dedupSimilarity_isSimilarTo", new RelationInverse()
.setInverse("isSimilarTo") .setInverse("isSimilarTo")
.setRelation("isSimilarTo") .setRelation("isSimilarTo")
.setRelType("personPerson") .setRelType("personPerson")
.setSubReltype("dedupSimilarity")); .setSubReltype("dedupSimilarity"));
relationInverseMap relationInverseMap
.put( .put(
"datasourceOrganization_provision_isProvidedBy", new RelationInverse() "datasourceOrganization_provision_isProvidedBy", new RelationInverse()
.setInverse("provides") .setInverse("provides")
.setRelation("isProvidedBy") .setRelation("isProvidedBy")
.setRelType("datasourceOrganization") .setRelType("datasourceOrganization")
.setSubReltype("provision")); .setSubReltype("provision"));
relationInverseMap relationInverseMap
.put( .put(
"datasourceOrganization_provision_provides", new RelationInverse() "datasourceOrganization_provision_provides", new RelationInverse()
.setInverse("isProvidedBy") .setInverse("isProvidedBy")
.setRelation("provides") .setRelation("provides")
.setRelType("datasourceOrganization") .setRelType("datasourceOrganization")
.setSubReltype("provision")); .setSubReltype("provision"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse() "resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse()
.setInverse("isAmongTopNSimilarDocuments") .setInverse("isAmongTopNSimilarDocuments")
.setRelation("hasAmongTopNSimilarDocuments") .setRelation("hasAmongTopNSimilarDocuments")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("similarity")); .setSubReltype("similarity"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse()
.setInverse("hasAmongTopNSimilarDocuments") .setInverse("hasAmongTopNSimilarDocuments")
.setRelation("isAmongTopNSimilarDocuments") .setRelation("isAmongTopNSimilarDocuments")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("similarity")); .setSubReltype("similarity"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_relationship_isRelatedTo", new RelationInverse() "resultResult_relationship_isRelatedTo", new RelationInverse()
.setInverse("isRelatedTo") .setInverse("isRelatedTo")
.setRelation("isRelatedTo") .setRelation("isRelatedTo")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("relationship")); .setSubReltype("relationship"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse()
.setInverse("hasAmongTopNSimilarDocuments") .setInverse("hasAmongTopNSimilarDocuments")
.setRelation("isAmongTopNSimilarDocuments") .setRelation("isAmongTopNSimilarDocuments")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("similarity")); .setSubReltype("similarity"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_supplement_isSupplementTo", new RelationInverse() "resultResult_supplement_isSupplementTo", new RelationInverse()
.setInverse("isSupplementedBy") .setInverse("isSupplementedBy")
.setRelation("isSupplementTo") .setRelation("isSupplementTo")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("supplement")); .setSubReltype("supplement"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_supplement_isSupplementedBy", new RelationInverse() "resultResult_supplement_isSupplementedBy", new RelationInverse()
.setInverse("isSupplementTo") .setInverse("isSupplementTo")
.setRelation("isSupplementedBy") .setRelation("isSupplementedBy")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("supplement")); .setSubReltype("supplement"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_part_isPartOf", new RelationInverse() "resultResult_part_isPartOf", new RelationInverse()
.setInverse("hasPart") .setInverse("hasPart")
.setRelation("isPartOf") .setRelation("isPartOf")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("part")); .setSubReltype("part"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_part_hasPart", new RelationInverse() "resultResult_part_hasPart", new RelationInverse()
.setInverse("isPartOf") .setInverse("isPartOf")
.setRelation("hasPart") .setRelation("hasPart")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("part")); .setSubReltype("part"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_dedup_merges", new RelationInverse() "resultResult_dedup_merges", new RelationInverse()
.setInverse("isMergedIn") .setInverse("isMergedIn")
.setRelation("merges") .setRelation("merges")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("dedup")); .setSubReltype("dedup"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_dedup_isMergedIn", new RelationInverse() "resultResult_dedup_isMergedIn", new RelationInverse()
.setInverse("merges") .setInverse("merges")
.setRelation("isMergedIn") .setRelation("isMergedIn")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("dedup")); .setSubReltype("dedup"));
relationInverseMap relationInverseMap
.put( .put(
"resultResult_dedupSimilarity_isSimilarTo", new RelationInverse() "resultResult_dedupSimilarity_isSimilarTo", new RelationInverse()
.setInverse("isSimilarTo") .setInverse("isSimilarTo")
.setRelation("isSimilarTo") .setRelation("isSimilarTo")
.setRelType("resultResult") .setRelType("resultResult")
.setSubReltype("dedupSimilarity")); .setSubReltype("dedupSimilarity"));
} }
@ -293,7 +294,7 @@ public class ModelSupport {
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass( public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Y superClazzObject) { X subClazzObject, Y superClazzObject) {
return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); return isSubClass(subClazzObject.getClass(), superClazzObject.getClass());
} }
@ -307,7 +308,7 @@ public class ModelSupport {
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass( public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
X subClazzObject, Class<Y> superClazz) { X subClazzObject, Class<Y> superClazz) {
return isSubClass(subClazzObject.getClass(), superClazz); return isSubClass(subClazzObject.getClass(), superClazz);
} }
@ -321,7 +322,7 @@ public class ModelSupport {
* @return True if X is a subclass of Y * @return True if X is a subclass of Y
*/ */
public static <X extends Oaf, Y extends Oaf> Boolean isSubClass( public static <X extends Oaf, Y extends Oaf> Boolean isSubClass(
Class<X> subClazz, Class<Y> superClazz) { Class<X> subClazz, Class<Y> superClazz) {
return superClazz.isAssignableFrom(subClazz); return superClazz.isAssignableFrom(subClazz);
} }
@ -333,32 +334,32 @@ public class ModelSupport {
*/ */
public static <T extends Oaf> Class<T>[] getOafModelClasses() { public static <T extends Oaf> Class<T>[] getOafModelClasses() {
return new Class[] { return new Class[] {
Author.class, Author.class,
Context.class, Context.class,
Country.class, Country.class,
DataInfo.class, DataInfo.class,
Dataset.class, Dataset.class,
Datasource.class, Datasource.class,
ExternalReference.class, ExternalReference.class,
ExtraInfo.class, ExtraInfo.class,
Field.class, Field.class,
GeoLocation.class, GeoLocation.class,
Instance.class, Instance.class,
Journal.class, Journal.class,
KeyValue.class, KeyValue.class,
Oaf.class, Oaf.class,
OafEntity.class, OafEntity.class,
OAIProvenance.class, OAIProvenance.class,
Organization.class, Organization.class,
OriginDescription.class, OriginDescription.class,
OtherResearchProduct.class, OtherResearchProduct.class,
Project.class, Project.class,
Publication.class, Publication.class,
Qualifier.class, Qualifier.class,
Relation.class, Relation.class,
Result.class, Result.class,
Software.class, Software.class,
StructuredProperty.class StructuredProperty.class
}; };
} }
@ -372,10 +373,10 @@ public class ModelSupport {
public static String getScheme(final String sourceType, final String targetType) { public static String getScheme(final String sourceType, final String targetType) {
return String return String
.format( .format(
schemeTemplate, schemeTemplate,
entityMapping.get(EntityType.valueOf(sourceType)).name(), entityMapping.get(EntityType.valueOf(sourceType)).name(),
entityMapping.get(EntityType.valueOf(targetType)).name()); entityMapping.get(EntityType.valueOf(targetType)).name());
} }
public static <T extends Oaf> Function<T, String> idFn() { public static <T extends Oaf> Function<T, String> idFn() {
@ -390,38 +391,38 @@ public class ModelSupport {
private static <T extends Oaf> String idFnForRelation(T t) { private static <T extends Oaf> String idFnForRelation(T t) {
Relation r = (Relation) t; Relation r = (Relation) t;
return Optional return Optional
.ofNullable(r.getSource()) .ofNullable(r.getSource())
.map( .map(
source -> Optional source -> Optional
.ofNullable(r.getTarget()) .ofNullable(r.getTarget())
.map( .map(
target -> Optional target -> Optional
.ofNullable(r.getRelType()) .ofNullable(r.getRelType())
.map( .map(
relType -> Optional relType -> Optional
.ofNullable(r.getSubRelType()) .ofNullable(r.getSubRelType())
.map( .map(
subRelType -> Optional subRelType -> Optional
.ofNullable(r.getRelClass()) .ofNullable(r.getRelClass())
.map( .map(
relClass -> String relClass -> String
.join( .join(
source, source,
target, target,
relType, relType,
subRelType, subRelType,
relClass)) relClass))
.orElse( .orElse(
String String
.join( .join(
source, source,
target, target,
relType, relType,
subRelType))) subRelType)))
.orElse(String.join(source, target, relType))) .orElse(String.join(source, target, relType)))
.orElse(String.join(source, target))) .orElse(String.join(source, target)))
.orElse(source)) .orElse(source))
.orElse(null); .orElse(null);
} }
private static <T extends Oaf> String idFnForOafEntity(T t) { private static <T extends Oaf> String idFnForOafEntity(T t) {

View File

@ -73,19 +73,6 @@ public class PrepareMergedRelationJob {
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath);
// relation.createOrReplaceTempView("relation");
//
// spark
// .sql(
// "Select * from relation " +
// "where relclass = 'merges' " +
// "and datainfo.deletedbyinference = false")
// .as(Encoders.bean(Relation.class))
// .toJSON()
// .write()
// .mode(SaveMode.Overwrite)
// .option("compression", "gzip")
// .text(outputPath);
} }
public static org.apache.spark.sql.Dataset<Relation> readRelations( public static org.apache.spark.sql.Dataset<Relation> readRelations(

View File

@ -65,8 +65,7 @@ public class ReadBlacklistFromDB implements Closeable {
} }
} }
public void execute(final String sql, final Function<ResultSet, List<Relation>> producer) public void execute(final String sql, final Function<ResultSet, List<Relation>> producer) throws Exception {
throws Exception {
final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); final Consumer<ResultSet> consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r));

View File

@ -84,7 +84,7 @@ public class SparkRemoveBlacklistedRelationJob {
.joinWith( .joinWith(
mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")), mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")),
"left_outer") "left_outer")
.map(c -> { .map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
Optional Optional
.ofNullable(c._2()) .ofNullable(c._2())
.ifPresent(mr -> c._1().setSource(mr.getSource())); .ifPresent(mr -> c._1().setSource(mr.getSource()));
@ -95,7 +95,7 @@ public class SparkRemoveBlacklistedRelationJob {
.joinWith( .joinWith(
mergesRelation, dedupSource.col("target").equalTo(mergesRelation.col("target")), mergesRelation, dedupSource.col("target").equalTo(mergesRelation.col("target")),
"left_outer") "left_outer")
.map(c -> { .map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
Optional Optional
.ofNullable(c._2()) .ofNullable(c._2())
.ifPresent(mr -> c._1().setTarget(mr.getSource())); .ifPresent(mr -> c._1().setTarget(mr.getSource()));
@ -107,7 +107,6 @@ public class SparkRemoveBlacklistedRelationJob {
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.json(blacklistPath + "/deduped"); .json(blacklistPath + "/deduped");
inputRelation inputRelation
.joinWith( .joinWith(
dedupBL, (inputRelation dedupBL, (inputRelation
@ -118,26 +117,23 @@ public class SparkRemoveBlacklistedRelationJob {
.col("target") .col("target")
.equalTo(dedupBL.col("target")))), .equalTo(dedupBL.col("target")))),
"left_outer") "left_outer")
.map(c -> { .map((MapFunction<Tuple2<Relation, Relation>, Relation>) c -> {
Relation ir = c._1(); Relation ir = c._1();
Optional<Relation> obl = Optional.ofNullable(c._2()); Optional<Relation> obl = Optional.ofNullable(c._2());
if (obl.isPresent()) { if (obl.isPresent()) {
if (ir.equals(obl.get())) { if (ir.equals(obl.get())) {
return null; return null;
}
} }
} return ir;
return ir; }, Encoders.bean(Relation.class))
}, Encoders.bean(Relation.class))
.filter(Objects::nonNull) .filter(Objects::nonNull)
.write() .write()
.mode(SaveMode.Overwrite) .mode(SaveMode.Overwrite)
.option("compression", "gzip") .option("compression", "gzip")
.json(outputPath); .json(outputPath);
} }
public static org.apache.spark.sql.Dataset<Relation> readRelations( public static org.apache.spark.sql.Dataset<Relation> readRelations(
SparkSession spark, String inputPath) { SparkSession spark, String inputPath) {
return spark return spark

View File

@ -19,6 +19,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Relation;
public class BlackListTest { public class BlackListTest {

View File

@ -1,11 +1,10 @@
package eu.dnetlib.dhp.bulktag; package eu.dnetlib.dhp.bulktag;
import com.fasterxml.jackson.databind.ObjectMapper; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
import com.google.gson.Gson;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import java.util.Optional;
import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.oaf.Result;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapFunction;
@ -16,9 +15,12 @@ import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.util.Optional; import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.bulktag.community.*;
import eu.dnetlib.dhp.schema.oaf.Result;
public class SparkBulkTagJob { public class SparkBulkTagJob {

View File

@ -1,14 +1,15 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.gson.Gson;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.gson.Gson;
/** Created by miriam on 01/08/2018. */ /** Created by miriam on 01/08/2018. */
public class Community implements Serializable { public class Community implements Serializable {

View File

@ -1,15 +1,6 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
import eu.dnetlib.dhp.bulktag.criteria.Selection;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.Serializable; import java.io.Serializable;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -17,6 +8,17 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
import eu.dnetlib.dhp.bulktag.criteria.Selection;
/** Created by miriam on 02/08/2018. */ /** Created by miriam on 02/08/2018. */
public class CommunityConfiguration implements Serializable { public class CommunityConfiguration implements Serializable {

View File

@ -1,14 +1,11 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.common.collect.Lists; import java.io.StringReader;
import com.google.common.collect.Maps; import java.util.ArrayList;
import com.google.gson.Gson; import java.util.List;
import com.google.gson.GsonBuilder; import java.util.Map;
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -17,10 +14,15 @@ import org.dom4j.DocumentException;
import org.dom4j.Node; import org.dom4j.Node;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
import java.io.StringReader; import com.google.common.collect.Lists;
import java.util.ArrayList; import com.google.common.collect.Maps;
import java.util.List; import com.google.gson.Gson;
import java.util.Map; import com.google.gson.GsonBuilder;
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
/** Created by miriam on 03/08/2018. */ /** Created by miriam on 03/08/2018. */
public class CommunityConfigurationFactory { public class CommunityConfigurationFactory {

View File

@ -1,12 +1,12 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import eu.dnetlib.dhp.bulktag.criteria.Selection;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
public class Constraint implements Serializable { public class Constraint implements Serializable {
private String verb; private String verb;
private String field; private String field;

View File

@ -1,12 +1,6 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Type; import java.lang.reflect.Type;
@ -14,6 +8,14 @@ import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
/** Created by miriam on 02/08/2018. */ /** Created by miriam on 02/08/2018. */
public class Constraints implements Serializable { public class Constraints implements Serializable {
private static final Log log = LogFactory.getLog(Constraints.class); private static final Log log = LogFactory.getLog(Constraints.class);

View File

@ -1,10 +1,10 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.gson.Gson;
import java.io.Serializable; import java.io.Serializable;
import com.google.gson.Gson;
/** Created by miriam on 03/08/2018. */ /** Created by miriam on 03/08/2018. */
public class Pair<A, B> implements Serializable { public class Pair<A, B> implements Serializable {
private A fst; private A fst;

View File

@ -1,13 +1,15 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.gson.Gson; import java.io.Serializable;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.dom4j.Node; import org.dom4j.Node;
import java.io.Serializable; import com.google.gson.Gson;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
/** Created by miriam on 01/08/2018. */ /** Created by miriam on 01/08/2018. */
public class Provider implements Serializable { public class Provider implements Serializable {

View File

@ -1,13 +1,15 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import java.util.List;
import org.dom4j.DocumentException;
import com.google.common.base.Joiner; import com.google.common.base.Joiner;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
import org.dom4j.DocumentException;
import java.util.List;
public class QueryInformationSystem { public class QueryInformationSystem {
private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') "

View File

@ -1,19 +1,21 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.gson.Gson; import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*;
import com.jayway.jsonpath.DocumentContext; import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.schema.oaf.*;
import org.apache.commons.lang3.StringUtils;
import java.io.Serializable; import java.io.Serializable;
import java.util.*; import java.util.*;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*; import org.apache.commons.lang3.StringUtils;
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
import com.google.gson.Gson;
import com.jayway.jsonpath.DocumentContext;
import com.jayway.jsonpath.JsonPath;
import eu.dnetlib.dhp.schema.oaf.*;
/** Created by miriam on 02/08/2018. */ /** Created by miriam on 02/08/2018. */
public class ResultTagger implements Serializable { public class ResultTagger implements Serializable {
@ -50,7 +52,7 @@ public class ResultTagger implements Serializable {
} }
public <R extends Result> R enrichContextCriteria( public <R extends Result> R enrichContextCriteria(
final R result, final CommunityConfiguration conf, final Map<String, String> criteria) { final R result, final CommunityConfiguration conf, final Map<String, String> criteria) {
// } // }
// public Result enrichContextCriteria(final Result result, final CommunityConfiguration // public Result enrichContextCriteria(final Result result, final CommunityConfiguration

View File

@ -1,16 +1,17 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.Type; import java.lang.reflect.Type;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
public class SelectionConstraints implements Serializable { public class SelectionConstraints implements Serializable {
private List<Constraints> criteria; private List<Constraints> criteria;

View File

@ -1,10 +1,11 @@
package eu.dnetlib.dhp.bulktag.community; package eu.dnetlib.dhp.bulktag.community;
import com.google.gson.Gson; import java.io.Serializable;
import org.dom4j.Node; import org.dom4j.Node;
import java.io.Serializable; import com.google.gson.Gson;
/** Created by miriam on 01/08/2018. */ /** Created by miriam on 01/08/2018. */
public class ZenodoCommunity implements Serializable { public class ZenodoCommunity implements Serializable {

View File

@ -1,10 +1,10 @@
package eu.dnetlib.dhp.bulktag.criteria; package eu.dnetlib.dhp.bulktag.criteria;
import com.google.gson.*;
import java.lang.reflect.Type; import java.lang.reflect.Type;
import com.google.gson.*;
public class InterfaceAdapter implements JsonSerializer, JsonDeserializer { public class InterfaceAdapter implements JsonSerializer, JsonDeserializer {
private static final String CLASSNAME = "CLASSNAME"; private static final String CLASSNAME = "CLASSNAME";

View File

@ -1,16 +1,16 @@
package eu.dnetlib.dhp.bulktag.criteria; package eu.dnetlib.dhp.bulktag.criteria;
import io.github.classgraph.ClassGraph;
import io.github.classgraph.ClassInfo;
import io.github.classgraph.ClassInfoList;
import io.github.classgraph.ScanResult;
import java.io.Serializable; import java.io.Serializable;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import io.github.classgraph.ClassGraph;
import io.github.classgraph.ClassInfo;
import io.github.classgraph.ClassInfoList;
import io.github.classgraph.ScanResult;
public class VerbResolver implements Serializable { public class VerbResolver implements Serializable {
private Map<String, Class<Selection>> map = null; // = new HashMap<>(); private Map<String, Class<Selection>> map = null; // = new HashMap<>();
private final ClassGraph classgraph = new ClassGraph(); private final ClassGraph classgraph = new ClassGraph();

View File

@ -1,11 +1,12 @@
package eu.dnetlib.dhp.bulktag; package eu.dnetlib.dhp.bulktag;
import com.fasterxml.jackson.databind.ObjectMapper; import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR;
import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; import java.io.IOException;
import eu.dnetlib.dhp.schema.oaf.Publication; import java.nio.file.Files;
import eu.dnetlib.dhp.schema.oaf.Software; import java.nio.file.Path;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.SparkConf; import org.apache.spark.SparkConf;
@ -18,15 +19,15 @@ import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import com.fasterxml.jackson.databind.ObjectMapper;
import java.nio.file.Files;
import java.nio.file.Path;
import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR; import eu.dnetlib.dhp.schema.oaf.Dataset;
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
import eu.dnetlib.dhp.schema.oaf.Publication;
import eu.dnetlib.dhp.schema.oaf.Software;
public class BulkTagJobTest { public class BulkTagJobTest {
@ -34,12 +35,11 @@ public class BulkTagJobTest {
public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp"; public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp";
public static final String pathMap = public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\","
"{ \"author\" : \"$['author'][*]['fullname']\"," + " \"title\" : \"$['title'][*]['value']\","
+ " \"title\" : \"$['title'][*]['value']\"," + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + " \"contributor\" : \"$['contributor'][*]['value']\","
+ " \"contributor\" : \"$['contributor'][*]['value']\"," + " \"description\" : \"$['description'][*]['value']\"}";
+ " \"description\" : \"$['description'][*]['value']\"}";
private static SparkSession spark; private static SparkSession spark;
@ -97,7 +97,8 @@ public class BulkTagJobTest {
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), "-sourcePath",
getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(),
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/dataset",
@ -129,8 +130,8 @@ public class BulkTagJobTest {
@Test @Test
public void bulktagBySubjectNoPreviousContextTest() throws Exception { public void bulktagBySubjectNoPreviousContextTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext")
.getPath(); .getPath();
final String pathMap = BulkTagJobTest.pathMap; final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob SparkBulkTagJob
.main( .main(
@ -139,7 +140,7 @@ public class BulkTagJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
@ -225,9 +226,9 @@ public class BulkTagJobTest {
@Test @Test
public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance")
.getPath(); .getPath();
final String pathMap = BulkTagJobTest.pathMap; final String pathMap = BulkTagJobTest.pathMap;
SparkBulkTagJob SparkBulkTagJob
.main( .main(
@ -236,7 +237,7 @@ public class BulkTagJobTest {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-taggingConf", taggingConf, "-taggingConf", taggingConf,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset",
"-outputPath", workingDir.toString() + "/dataset", "-outputPath", workingDir.toString() + "/dataset",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap "-pathMap", pathMap
@ -307,8 +308,8 @@ public class BulkTagJobTest {
@Test @Test
public void bulktagByDatasourceTest() throws Exception { public void bulktagByDatasourceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
@ -379,9 +380,9 @@ public class BulkTagJobTest {
@Test @Test
public void bulktagByZenodoCommunityTest() throws Exception { public void bulktagByZenodoCommunityTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
@ -501,8 +502,8 @@ public class BulkTagJobTest {
@Test @Test
public void bulktagBySubjectDatasourceTest() throws Exception { public void bulktagBySubjectDatasourceTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {
@ -726,9 +727,9 @@ public class BulkTagJobTest {
public void bulktagDatasourcewithConstraintsTest() throws Exception { public void bulktagDatasourcewithConstraintsTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints")
.getPath(); .getPath();
SparkBulkTagJob SparkBulkTagJob
.main( .main(
new String[] { new String[] {

View File

@ -1,21 +1,23 @@
package eu.dnetlib.dhp.bulktag; package eu.dnetlib.dhp.bulktag;
import com.google.gson.Gson; import java.io.IOException;
import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; import java.lang.reflect.InvocationTargetException;
import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory; import java.util.*;
import eu.dnetlib.dhp.bulktag.community.Constraint;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import java.io.IOException; import com.google.gson.Gson;
import java.lang.reflect.InvocationTargetException;
import java.util.*; import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration;
import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory;
import eu.dnetlib.dhp.bulktag.community.Constraint;
import eu.dnetlib.dhp.bulktag.community.SelectionConstraints;
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
/** Created by miriam on 03/08/2018. */ /** Created by miriam on 03/08/2018. */
public class CommunityConfigurationFactoryTest { public class CommunityConfigurationFactoryTest {

View File

@ -103,7 +103,8 @@ public class CountryPropagationJobTest {
Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count()); Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count());
Dataset<String> countryExploded = verificationDs Dataset<String> countryExploded = verificationDs
.flatMap((FlatMapFunction<Software, Country>) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) .flatMap(
(FlatMapFunction<Software, Country>) row -> row.getCountry().iterator(), Encoders.bean(Country.class))
.map((MapFunction<Country, String>) c -> c.getClassid(), Encoders.STRING()); .map((MapFunction<Country, String>) c -> c.getClassid(), Encoders.STRING());
Assertions.assertEquals(9, countryExploded.count()); Assertions.assertEquals(9, countryExploded.count());
@ -123,10 +124,10 @@ public class CountryPropagationJobTest {
country_list country_list
.stream() .stream()
.forEach( .forEach(
c -> prova c -> prova
.add( .add(
new Tuple2<>( new Tuple2<>(
row.getId(), c.getClassid()))); row.getId(), c.getClassid())));
return prova.iterator(); return prova.iterator();
}, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); }, Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
@ -178,20 +179,20 @@ public class CountryPropagationJobTest {
Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname = verificationDs Dataset<Tuple2<String, String>> countryExplodedWithCountryclassname = verificationDs
.flatMap( .flatMap(
(FlatMapFunction<Software, Tuple2<String, String>>) row -> { (FlatMapFunction<Software, Tuple2<String, String>>) row -> {
List<Tuple2<String, String>> prova = new ArrayList(); List<Tuple2<String, String>> prova = new ArrayList();
List<Country> country_list = row.getCountry(); List<Country> country_list = row.getCountry();
country_list country_list
.stream() .stream()
.forEach( .forEach(
c -> prova c -> prova
.add( .add(
new Tuple2<>( new Tuple2<>(
row.getId(), row.getId(),
c.getClassname()))); c.getClassname())));
return prova.iterator(); return prova.iterator();
}, },
Encoders.tuple(Encoders.STRING(), Encoders.STRING())); Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
countryExplodedWithCountryclassname.show(false); countryExplodedWithCountryclassname.show(false);
Assertions Assertions
@ -239,22 +240,22 @@ public class CountryPropagationJobTest {
Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance = verificationDs Dataset<Tuple2<String, String>> countryExplodedWithCountryProvenance = verificationDs
.flatMap( .flatMap(
(FlatMapFunction<Software, Tuple2<String, String>>) row -> { (FlatMapFunction<Software, Tuple2<String, String>>) row -> {
List<Tuple2<String, String>> prova = new ArrayList(); List<Tuple2<String, String>> prova = new ArrayList();
List<Country> country_list = row.getCountry(); List<Country> country_list = row.getCountry();
country_list country_list
.stream() .stream()
.forEach( .forEach(
c -> prova c -> prova
.add( .add(
new Tuple2<>( new Tuple2<>(
row.getId(), row.getId(),
c c
.getDataInfo() .getDataInfo()
.getInferenceprovenance()))); .getInferenceprovenance())));
return prova.iterator(); return prova.iterator();
}, },
Encoders.tuple(Encoders.STRING(), Encoders.STRING())); Encoders.tuple(Encoders.STRING(), Encoders.STRING()));
Assertions Assertions
.assertEquals( .assertEquals(

View File

@ -72,14 +72,15 @@ public class ProjectPropagationJobTest {
public void NoUpdateTest() throws Exception { public void NoUpdateTest() throws Exception {
final String potentialUpdateDate = getClass() final String potentialUpdateDate = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates")
.getPath(); .getPath();
final String alreadyLinkedPath = getClass() final String alreadyLinkedPath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
.getPath(); .getPath();
SparkResultToProjectThroughSemRelJob.main( SparkResultToProjectThroughSemRelJob
.main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
@ -107,14 +108,15 @@ public class ProjectPropagationJobTest {
@Test @Test
public void UpdateTenTest() throws Exception { public void UpdateTenTest() throws Exception {
final String potentialUpdatePath = getClass() final String potentialUpdatePath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates")
.getPath(); .getPath();
final String alreadyLinkedPath = getClass() final String alreadyLinkedPath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
.getPath(); .getPath();
SparkResultToProjectThroughSemRelJob.main( SparkResultToProjectThroughSemRelJob
.main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
@ -143,8 +145,8 @@ public class ProjectPropagationJobTest {
.assertEquals( .assertEquals(
5, 5,
verificationDs verificationDs
.filter((FilterFunction<Relation>) r -> .filter(
r.getSource().startsWith("50") (FilterFunction<Relation>) r -> r.getSource().startsWith("50")
&& r.getTarget().startsWith("40") && r.getTarget().startsWith("40")
&& r.getRelClass().equals("isProducedBy")) && r.getRelClass().equals("isProducedBy"))
.count()); .count());
@ -152,8 +154,8 @@ public class ProjectPropagationJobTest {
.assertEquals( .assertEquals(
5, 5,
verificationDs verificationDs
.filter((FilterFunction<Relation>) r -> .filter(
r.getSource().startsWith("40") (FilterFunction<Relation>) r -> r.getSource().startsWith("40")
&& r.getTarget().startsWith("50") && r.getTarget().startsWith("50")
&& r.getRelClass().equals("produces")) && r.getRelClass().equals("produces"))
.count()); .count());
@ -178,14 +180,15 @@ public class ProjectPropagationJobTest {
@Test @Test
public void UpdateMixTest() throws Exception { public void UpdateMixTest() throws Exception {
final String potentialUpdatepath = getClass() final String potentialUpdatepath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates")
.getPath(); .getPath();
final String alreadyLinkedPath = getClass() final String alreadyLinkedPath = getClass()
.getResource( .getResource(
"/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked")
.getPath(); .getPath();
SparkResultToProjectThroughSemRelJob.main( SparkResultToProjectThroughSemRelJob
.main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
@ -217,8 +220,8 @@ public class ProjectPropagationJobTest {
.assertEquals( .assertEquals(
4, 4,
verificationDs verificationDs
.filter((FilterFunction<Relation>) r -> .filter(
r.getSource().startsWith("50") (FilterFunction<Relation>) r -> r.getSource().startsWith("50")
&& r.getTarget().startsWith("40") && r.getTarget().startsWith("40")
&& r.getRelClass().equals("isProducedBy")) && r.getRelClass().equals("isProducedBy"))
.count()); .count());
@ -226,8 +229,8 @@ public class ProjectPropagationJobTest {
.assertEquals( .assertEquals(
4, 4,
verificationDs verificationDs
.filter((FilterFunction<Relation>) r -> .filter(
r.getSource().startsWith("40") (FilterFunction<Relation>) r -> r.getSource().startsWith("40")
&& r.getTarget().startsWith("50") && r.getTarget().startsWith("50")
&& r.getRelClass().equals("produces")) && r.getRelClass().equals("produces"))
.count()); .count());

View File

@ -67,9 +67,10 @@ public class ResultToCommunityJobTest {
@Test @Test
public void testSparkResultToCommunityFromOrganizationJob() throws Exception { public void testSparkResultToCommunityFromOrganizationJob() throws Exception {
final String preparedInfoPath = getClass() final String preparedInfoPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo")
.getPath(); .getPath();
SparkResultToCommunityFromOrganizationJob.main( SparkResultToCommunityFromOrganizationJob
.main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),

View File

@ -69,15 +69,17 @@ public class ResultToOrganizationJobTest {
@Test @Test
public void NoUpdateTest() throws Exception { public void NoUpdateTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
.getPath(); .getPath();
final String datasourceOrganizationPath = getClass() final String datasourceOrganizationPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization") .getResource(
.getPath(); "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization")
.getPath();
final String alreadyLinkedPath = getClass() final String alreadyLinkedPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked") .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked")
.getPath(); .getPath();
SparkResultToOrganizationFromIstRepoJob.main( SparkResultToOrganizationFromIstRepoJob
.main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
@ -109,15 +111,17 @@ public class ResultToOrganizationJobTest {
@Test @Test
public void UpdateNoMixTest() throws Exception { public void UpdateNoMixTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix")
.getPath(); .getPath();
final String datasourceOrganizationPath = getClass() final String datasourceOrganizationPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization") .getResource(
.getPath(); "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization")
.getPath();
final String alreadyLinkedPath = getClass() final String alreadyLinkedPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked") .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked")
.getPath(); .getPath();
SparkResultToOrganizationFromIstRepoJob.main( SparkResultToOrganizationFromIstRepoJob
.main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
@ -172,15 +176,17 @@ public class ResultToOrganizationJobTest {
@Test @Test
public void UpdateMixTest() throws Exception { public void UpdateMixTest() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix")
.getPath(); .getPath();
final String datasourceOrganizationPath = getClass() final String datasourceOrganizationPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization") .getResource(
.getPath(); "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization")
.getPath();
final String alreadyLinkedPath = getClass() final String alreadyLinkedPath = getClass()
.getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked") .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked")
.getPath(); .getPath();
SparkResultToOrganizationFromIstRepoJob.main( SparkResultToOrganizationFromIstRepoJob
.main(
new String[] { new String[] {
"-isTest", Boolean.TRUE.toString(), "-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),