diff --git a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java index 7b0b9a1e2..fc85b1ac1 100644 --- a/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java +++ b/dhp-schemas/src/main/java/eu/dnetlib/dhp/schema/common/ModelSupport.java @@ -1,3 +1,4 @@ + package eu.dnetlib.dhp.schema.common; import java.util.Map; @@ -65,216 +66,216 @@ public class ModelSupport { static { relationInverseMap - .put( - "personResult_authorship_isAuthorOf", new RelationInverse() - .setRelation("isAuthorOf") - .setInverse("hasAuthor") - .setRelType("personResult") - .setSubReltype("authorship")); + .put( + "personResult_authorship_isAuthorOf", new RelationInverse() + .setRelation("isAuthorOf") + .setInverse("hasAuthor") + .setRelType("personResult") + .setSubReltype("authorship")); relationInverseMap - .put( - "personResult_authorship_hasAuthor", new RelationInverse() - .setInverse("isAuthorOf") - .setRelation("hasAuthor") - .setRelType("personResult") - .setSubReltype("authorship")); + .put( + "personResult_authorship_hasAuthor", new RelationInverse() + .setInverse("isAuthorOf") + .setRelation("hasAuthor") + .setRelType("personResult") + .setSubReltype("authorship")); relationInverseMap - .put( - "projectOrganization_participation_isParticipant", new RelationInverse() - .setRelation("isParticipant") - .setInverse("hasParticipant") - .setRelType("projectOrganization") - .setSubReltype("participation")); + .put( + "projectOrganization_participation_isParticipant", new RelationInverse() + .setRelation("isParticipant") + .setInverse("hasParticipant") + .setRelType("projectOrganization") + .setSubReltype("participation")); relationInverseMap - .put( - "projectOrganization_participation_hasParticipant", new RelationInverse() - .setInverse("isParticipant") - .setRelation("hasParticipant") - .setRelType("projectOrganization") - .setSubReltype("participation")); + .put( + "projectOrganization_participation_hasParticipant", new RelationInverse() + .setInverse("isParticipant") + .setRelation("hasParticipant") + .setRelType("projectOrganization") + .setSubReltype("participation")); relationInverseMap - .put( - "resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse() - .setRelation("hasAuthorInstitution") - .setInverse("isAuthorInstitutionOf") - .setRelType("resultOrganization") - .setSubReltype("affiliation")); + .put( + "resultOrganization_affiliation_hasAuthorInstitution", new RelationInverse() + .setRelation("hasAuthorInstitution") + .setInverse("isAuthorInstitutionOf") + .setRelType("resultOrganization") + .setSubReltype("affiliation")); relationInverseMap - .put( - "resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse() - .setInverse("hasAuthorInstitution") - .setRelation("isAuthorInstitutionOf") - .setRelType("resultOrganization") - .setSubReltype("affiliation")); + .put( + "resultOrganization_affiliation_isAuthorInstitutionOf", new RelationInverse() + .setInverse("hasAuthorInstitution") + .setRelation("isAuthorInstitutionOf") + .setRelType("resultOrganization") + .setSubReltype("affiliation")); relationInverseMap - .put( - "organizationOrganization_dedup_merges", new RelationInverse() - .setRelation("merges") - .setInverse("isMergedIn") - .setRelType("organizationOrganization") - .setSubReltype("dedup")); + .put( + "organizationOrganization_dedup_merges", new RelationInverse() + .setRelation("merges") + .setInverse("isMergedIn") + .setRelType("organizationOrganization") + .setSubReltype("dedup")); relationInverseMap - .put( - "organizationOrganization_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("organizationOrganization") - .setSubReltype("dedup")); + .put( + "organizationOrganization_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("organizationOrganization") + .setSubReltype("dedup")); relationInverseMap - .put( - "organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("organizationOrganization") - .setSubReltype("dedupSimilarity")); + .put( + "organizationOrganization_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("organizationOrganization") + .setSubReltype("dedupSimilarity")); relationInverseMap - .put( - "resultProject_outcome_isProducedBy", new RelationInverse() - .setRelation("isProducedBy") - .setInverse("produces") - .setRelType("resultProject") - .setSubReltype("outcome")); + .put( + "resultProject_outcome_isProducedBy", new RelationInverse() + .setRelation("isProducedBy") + .setInverse("produces") + .setRelType("resultProject") + .setSubReltype("outcome")); relationInverseMap - .put( - "resultProject_outcome_produces", new RelationInverse() - .setInverse("isProducedBy") - .setRelation("produces") - .setRelType("resultProject") - .setSubReltype("outcome")); + .put( + "resultProject_outcome_produces", new RelationInverse() + .setInverse("isProducedBy") + .setRelation("produces") + .setRelType("resultProject") + .setSubReltype("outcome")); relationInverseMap - .put( - "projectPerson_contactPerson_isContact", new RelationInverse() - .setRelation("isContact") - .setInverse("hasContact") - .setRelType("projectPerson") - .setSubReltype("contactPerson")); + .put( + "projectPerson_contactPerson_isContact", new RelationInverse() + .setRelation("isContact") + .setInverse("hasContact") + .setRelType("projectPerson") + .setSubReltype("contactPerson")); relationInverseMap - .put( - "projectPerson_contactPerson_hasContact", new RelationInverse() - .setInverse("isContact") - .setRelation("hasContact") - .setRelType("personPerson") - .setSubReltype("coAuthorship")); + .put( + "projectPerson_contactPerson_hasContact", new RelationInverse() + .setInverse("isContact") + .setRelation("hasContact") + .setRelType("personPerson") + .setSubReltype("coAuthorship")); relationInverseMap - .put( - "personPerson_coAuthorship_isCoauthorOf", new RelationInverse() - .setInverse("isCoAuthorOf") - .setRelation("isCoAuthorOf") - .setRelType("personPerson") - .setSubReltype("coAuthorship")); + .put( + "personPerson_coAuthorship_isCoauthorOf", new RelationInverse() + .setInverse("isCoAuthorOf") + .setRelation("isCoAuthorOf") + .setRelType("personPerson") + .setSubReltype("coAuthorship")); relationInverseMap - .put( - "personPerson_dedup_merges", new RelationInverse() - .setInverse("isMergedIn") - .setRelation("merges") - .setRelType("personPerson") - .setSubReltype("dedup")); + .put( + "personPerson_dedup_merges", new RelationInverse() + .setInverse("isMergedIn") + .setRelation("merges") + .setRelType("personPerson") + .setSubReltype("dedup")); relationInverseMap - .put( - "personPerson_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("personPerson") - .setSubReltype("dedup")); + .put( + "personPerson_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("personPerson") + .setSubReltype("dedup")); relationInverseMap - .put( - "personPerson_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("personPerson") - .setSubReltype("dedupSimilarity")); + .put( + "personPerson_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("personPerson") + .setSubReltype("dedupSimilarity")); relationInverseMap - .put( - "datasourceOrganization_provision_isProvidedBy", new RelationInverse() - .setInverse("provides") - .setRelation("isProvidedBy") - .setRelType("datasourceOrganization") - .setSubReltype("provision")); + .put( + "datasourceOrganization_provision_isProvidedBy", new RelationInverse() + .setInverse("provides") + .setRelation("isProvidedBy") + .setRelType("datasourceOrganization") + .setSubReltype("provision")); relationInverseMap - .put( - "datasourceOrganization_provision_provides", new RelationInverse() - .setInverse("isProvidedBy") - .setRelation("provides") - .setRelType("datasourceOrganization") - .setSubReltype("provision")); + .put( + "datasourceOrganization_provision_provides", new RelationInverse() + .setInverse("isProvidedBy") + .setRelation("provides") + .setRelType("datasourceOrganization") + .setSubReltype("provision")); relationInverseMap - .put( - "resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("isAmongTopNSimilarDocuments") - .setRelation("hasAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); + .put( + "resultResult_similarity_hasAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("isAmongTopNSimilarDocuments") + .setRelation("hasAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); relationInverseMap - .put( - "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("hasAmongTopNSimilarDocuments") - .setRelation("isAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); + .put( + "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("hasAmongTopNSimilarDocuments") + .setRelation("isAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); relationInverseMap - .put( - "resultResult_relationship_isRelatedTo", new RelationInverse() - .setInverse("isRelatedTo") - .setRelation("isRelatedTo") - .setRelType("resultResult") - .setSubReltype("relationship")); + .put( + "resultResult_relationship_isRelatedTo", new RelationInverse() + .setInverse("isRelatedTo") + .setRelation("isRelatedTo") + .setRelType("resultResult") + .setSubReltype("relationship")); relationInverseMap - .put( - "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() - .setInverse("hasAmongTopNSimilarDocuments") - .setRelation("isAmongTopNSimilarDocuments") - .setRelType("resultResult") - .setSubReltype("similarity")); + .put( + "resultResult_similarity_isAmongTopNSimilarDocuments", new RelationInverse() + .setInverse("hasAmongTopNSimilarDocuments") + .setRelation("isAmongTopNSimilarDocuments") + .setRelType("resultResult") + .setSubReltype("similarity")); relationInverseMap - .put( - "resultResult_supplement_isSupplementTo", new RelationInverse() - .setInverse("isSupplementedBy") - .setRelation("isSupplementTo") - .setRelType("resultResult") - .setSubReltype("supplement")); + .put( + "resultResult_supplement_isSupplementTo", new RelationInverse() + .setInverse("isSupplementedBy") + .setRelation("isSupplementTo") + .setRelType("resultResult") + .setSubReltype("supplement")); relationInverseMap - .put( - "resultResult_supplement_isSupplementedBy", new RelationInverse() - .setInverse("isSupplementTo") - .setRelation("isSupplementedBy") - .setRelType("resultResult") - .setSubReltype("supplement")); + .put( + "resultResult_supplement_isSupplementedBy", new RelationInverse() + .setInverse("isSupplementTo") + .setRelation("isSupplementedBy") + .setRelType("resultResult") + .setSubReltype("supplement")); relationInverseMap - .put( - "resultResult_part_isPartOf", new RelationInverse() - .setInverse("hasPart") - .setRelation("isPartOf") - .setRelType("resultResult") - .setSubReltype("part")); + .put( + "resultResult_part_isPartOf", new RelationInverse() + .setInverse("hasPart") + .setRelation("isPartOf") + .setRelType("resultResult") + .setSubReltype("part")); relationInverseMap - .put( - "resultResult_part_hasPart", new RelationInverse() - .setInverse("isPartOf") - .setRelation("hasPart") - .setRelType("resultResult") - .setSubReltype("part")); + .put( + "resultResult_part_hasPart", new RelationInverse() + .setInverse("isPartOf") + .setRelation("hasPart") + .setRelType("resultResult") + .setSubReltype("part")); relationInverseMap - .put( - "resultResult_dedup_merges", new RelationInverse() - .setInverse("isMergedIn") - .setRelation("merges") - .setRelType("resultResult") - .setSubReltype("dedup")); + .put( + "resultResult_dedup_merges", new RelationInverse() + .setInverse("isMergedIn") + .setRelation("merges") + .setRelType("resultResult") + .setSubReltype("dedup")); relationInverseMap - .put( - "resultResult_dedup_isMergedIn", new RelationInverse() - .setInverse("merges") - .setRelation("isMergedIn") - .setRelType("resultResult") - .setSubReltype("dedup")); + .put( + "resultResult_dedup_isMergedIn", new RelationInverse() + .setInverse("merges") + .setRelation("isMergedIn") + .setRelType("resultResult") + .setSubReltype("dedup")); relationInverseMap - .put( - "resultResult_dedupSimilarity_isSimilarTo", new RelationInverse() - .setInverse("isSimilarTo") - .setRelation("isSimilarTo") - .setRelType("resultResult") - .setSubReltype("dedupSimilarity")); + .put( + "resultResult_dedupSimilarity_isSimilarTo", new RelationInverse() + .setInverse("isSimilarTo") + .setRelation("isSimilarTo") + .setRelType("resultResult") + .setSubReltype("dedupSimilarity")); } @@ -293,7 +294,7 @@ public class ModelSupport { * @return True if X is a subclass of Y */ public static Boolean isSubClass( - X subClazzObject, Y superClazzObject) { + X subClazzObject, Y superClazzObject) { return isSubClass(subClazzObject.getClass(), superClazzObject.getClass()); } @@ -307,7 +308,7 @@ public class ModelSupport { * @return True if X is a subclass of Y */ public static Boolean isSubClass( - X subClazzObject, Class superClazz) { + X subClazzObject, Class superClazz) { return isSubClass(subClazzObject.getClass(), superClazz); } @@ -321,7 +322,7 @@ public class ModelSupport { * @return True if X is a subclass of Y */ public static Boolean isSubClass( - Class subClazz, Class superClazz) { + Class subClazz, Class superClazz) { return superClazz.isAssignableFrom(subClazz); } @@ -333,32 +334,32 @@ public class ModelSupport { */ public static Class[] getOafModelClasses() { return new Class[] { - Author.class, - Context.class, - Country.class, - DataInfo.class, - Dataset.class, - Datasource.class, - ExternalReference.class, - ExtraInfo.class, - Field.class, - GeoLocation.class, - Instance.class, - Journal.class, - KeyValue.class, - Oaf.class, - OafEntity.class, - OAIProvenance.class, - Organization.class, - OriginDescription.class, - OtherResearchProduct.class, - Project.class, - Publication.class, - Qualifier.class, - Relation.class, - Result.class, - Software.class, - StructuredProperty.class + Author.class, + Context.class, + Country.class, + DataInfo.class, + Dataset.class, + Datasource.class, + ExternalReference.class, + ExtraInfo.class, + Field.class, + GeoLocation.class, + Instance.class, + Journal.class, + KeyValue.class, + Oaf.class, + OafEntity.class, + OAIProvenance.class, + Organization.class, + OriginDescription.class, + OtherResearchProduct.class, + Project.class, + Publication.class, + Qualifier.class, + Relation.class, + Result.class, + Software.class, + StructuredProperty.class }; } @@ -372,10 +373,10 @@ public class ModelSupport { public static String getScheme(final String sourceType, final String targetType) { return String - .format( - schemeTemplate, - entityMapping.get(EntityType.valueOf(sourceType)).name(), - entityMapping.get(EntityType.valueOf(targetType)).name()); + .format( + schemeTemplate, + entityMapping.get(EntityType.valueOf(sourceType)).name(), + entityMapping.get(EntityType.valueOf(targetType)).name()); } public static Function idFn() { @@ -390,38 +391,38 @@ public class ModelSupport { private static String idFnForRelation(T t) { Relation r = (Relation) t; return Optional - .ofNullable(r.getSource()) - .map( - source -> Optional - .ofNullable(r.getTarget()) - .map( - target -> Optional - .ofNullable(r.getRelType()) - .map( - relType -> Optional - .ofNullable(r.getSubRelType()) - .map( - subRelType -> Optional - .ofNullable(r.getRelClass()) - .map( - relClass -> String - .join( - source, - target, - relType, - subRelType, - relClass)) - .orElse( - String - .join( - source, - target, - relType, - subRelType))) - .orElse(String.join(source, target, relType))) - .orElse(String.join(source, target))) - .orElse(source)) - .orElse(null); + .ofNullable(r.getSource()) + .map( + source -> Optional + .ofNullable(r.getTarget()) + .map( + target -> Optional + .ofNullable(r.getRelType()) + .map( + relType -> Optional + .ofNullable(r.getSubRelType()) + .map( + subRelType -> Optional + .ofNullable(r.getRelClass()) + .map( + relClass -> String + .join( + source, + target, + relType, + subRelType, + relClass)) + .orElse( + String + .join( + source, + target, + relType, + subRelType))) + .orElse(String.join(source, target, relType))) + .orElse(String.join(source, target))) + .orElse(source)) + .orElse(null); } private static String idFnForOafEntity(T t) { diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java index d5c2b518a..0ef59e8c2 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/PrepareMergedRelationJob.java @@ -73,19 +73,6 @@ public class PrepareMergedRelationJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); -// relation.createOrReplaceTempView("relation"); -// -// spark -// .sql( -// "Select * from relation " + -// "where relclass = 'merges' " + -// "and datainfo.deletedbyinference = false") -// .as(Encoders.bean(Relation.class)) -// .toJSON() -// .write() -// .mode(SaveMode.Overwrite) -// .option("compression", "gzip") -// .text(outputPath); } public static org.apache.spark.sql.Dataset readRelations( diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java index 704cab375..2caa66db4 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/ReadBlacklistFromDB.java @@ -65,8 +65,7 @@ public class ReadBlacklistFromDB implements Closeable { } } - public void execute(final String sql, final Function> producer) - throws Exception { + public void execute(final String sql, final Function> producer) throws Exception { final Consumer consumer = rs -> producer.apply(rs).forEach(r -> writeRelation(r)); diff --git a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java index c5104058c..86587bfc9 100644 --- a/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java +++ b/dhp-workflows/dhp-blacklist/src/main/java/eu/dnetlib/dhp/blacklist/SparkRemoveBlacklistedRelationJob.java @@ -84,7 +84,7 @@ public class SparkRemoveBlacklistedRelationJob { .joinWith( mergesRelation, blackListed.col("source").equalTo(mergesRelation.col("target")), "left_outer") - .map(c -> { + .map((MapFunction, Relation>) c -> { Optional .ofNullable(c._2()) .ifPresent(mr -> c._1().setSource(mr.getSource())); @@ -95,7 +95,7 @@ public class SparkRemoveBlacklistedRelationJob { .joinWith( mergesRelation, dedupSource.col("target").equalTo(mergesRelation.col("target")), "left_outer") - .map(c -> { + .map((MapFunction, Relation>) c -> { Optional .ofNullable(c._2()) .ifPresent(mr -> c._1().setTarget(mr.getSource())); @@ -107,7 +107,6 @@ public class SparkRemoveBlacklistedRelationJob { .mode(SaveMode.Overwrite) .json(blacklistPath + "/deduped"); - inputRelation .joinWith( dedupBL, (inputRelation @@ -118,26 +117,23 @@ public class SparkRemoveBlacklistedRelationJob { .col("target") .equalTo(dedupBL.col("target")))), "left_outer") - .map(c -> { - Relation ir = c._1(); - Optional obl = Optional.ofNullable(c._2()); - if (obl.isPresent()) { - if (ir.equals(obl.get())) { - return null; + .map((MapFunction, Relation>) c -> { + Relation ir = c._1(); + Optional obl = Optional.ofNullable(c._2()); + if (obl.isPresent()) { + if (ir.equals(obl.get())) { + return null; + } } - } - return ir; - - }, Encoders.bean(Relation.class)) + return ir; + }, Encoders.bean(Relation.class)) .filter(Objects::nonNull) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); - } - public static org.apache.spark.sql.Dataset readRelations( SparkSession spark, String inputPath) { return spark diff --git a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java index 2d6b1061b..bbfd15674 100644 --- a/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java +++ b/dhp-workflows/dhp-blacklist/src/test/java/eu/dnetlib/dhp/blacklist/BlackListTest.java @@ -19,6 +19,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; + import eu.dnetlib.dhp.schema.oaf.Relation; public class BlackListTest { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index e3d74ef3e..75d85e2ba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -1,11 +1,10 @@ package eu.dnetlib.dhp.bulktag; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.gson.Gson; -import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.bulktag.community.*; -import eu.dnetlib.dhp.schema.oaf.Result; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.util.Optional; + import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; @@ -16,9 +15,12 @@ import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.Optional; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.gson.Gson; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.bulktag.community.*; +import eu.dnetlib.dhp.schema.oaf.Result; public class SparkBulkTagJob { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java index d492b848e..0f45d3beb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Community.java @@ -1,14 +1,15 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.io.Serializable; import java.util.ArrayList; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.google.gson.Gson; + /** Created by miriam on 01/08/2018. */ public class Community implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java index 4e5b9fc9f..29ddde15f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfiguration.java @@ -1,15 +1,6 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; -import eu.dnetlib.dhp.bulktag.criteria.Selection; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.io.Serializable; import java.util.ArrayList; import java.util.HashMap; @@ -17,6 +8,17 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; +import eu.dnetlib.dhp.bulktag.criteria.Selection; + /** Created by miriam on 02/08/2018. */ public class CommunityConfiguration implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java index dc83497c3..607315f3f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/CommunityConfigurationFactory.java @@ -1,14 +1,11 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; -import eu.dnetlib.dhp.bulktag.criteria.Selection; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -17,10 +14,15 @@ import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter; +import eu.dnetlib.dhp.bulktag.criteria.Selection; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory; /** Created by miriam on 03/08/2018. */ public class CommunityConfigurationFactory { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java index 8e28a7a5f..e0856ae8f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraint.java @@ -1,12 +1,12 @@ package eu.dnetlib.dhp.bulktag.community; -import eu.dnetlib.dhp.bulktag.criteria.Selection; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; - import java.io.Serializable; import java.lang.reflect.InvocationTargetException; +import eu.dnetlib.dhp.bulktag.criteria.Selection; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; + public class Constraint implements Serializable { private String verb; private String field; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java index eace3bc35..b56dfaaa3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Constraints.java @@ -1,12 +1,6 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import java.io.Serializable; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Type; @@ -14,6 +8,14 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; + /** Created by miriam on 02/08/2018. */ public class Constraints implements Serializable { private static final Log log = LogFactory.getLog(Constraints.class); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java index 1130a0770..50e1836fa 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Pair.java @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; - import java.io.Serializable; +import com.google.gson.Gson; + /** Created by miriam on 03/08/2018. */ public class Pair implements Serializable { private A fst; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java index c4362610e..b9c37f4dc 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/Provider.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import java.io.Serializable; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dom4j.Node; -import java.io.Serializable; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 01/08/2018. */ public class Provider implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java index 43eb40940..7ec2f916f 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/QueryInformationSystem.java @@ -1,13 +1,15 @@ package eu.dnetlib.dhp.bulktag.community; +import java.util.List; + +import org.dom4j.DocumentException; + import com.google.common.base.Joiner; + import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import org.dom4j.DocumentException; - -import java.util.List; public class QueryInformationSystem { private static final String XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType') " diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java index fd4f5497a..f5a985d15 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ResultTagger.java @@ -1,19 +1,21 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import com.jayway.jsonpath.DocumentContext; -import com.jayway.jsonpath.JsonPath; -import eu.dnetlib.dhp.schema.oaf.*; -import org.apache.commons.lang3.StringUtils; +import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*; +import static eu.dnetlib.dhp.schema.common.ModelConstants.*; import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; import java.util.stream.Stream; -import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.*; -import static eu.dnetlib.dhp.schema.common.ModelConstants.*; +import org.apache.commons.lang3.StringUtils; + +import com.google.gson.Gson; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; + +import eu.dnetlib.dhp.schema.oaf.*; /** Created by miriam on 02/08/2018. */ public class ResultTagger implements Serializable { @@ -50,7 +52,7 @@ public class ResultTagger implements Serializable { } public R enrichContextCriteria( - final R result, final CommunityConfiguration conf, final Map criteria) { + final R result, final CommunityConfiguration conf, final Map criteria) { // } // public Result enrichContextCriteria(final Result result, final CommunityConfiguration diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java index 28674d9ef..71ff61d1b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/SelectionConstraints.java @@ -1,16 +1,17 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; -import com.google.gson.reflect.TypeToken; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; - import java.io.Serializable; import java.lang.reflect.Type; import java.util.Collection; import java.util.List; import java.util.Map; +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; + public class SelectionConstraints implements Serializable { private List criteria; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java index eb0577ffc..bc6b75fba 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/ZenodoCommunity.java @@ -1,10 +1,11 @@ package eu.dnetlib.dhp.bulktag.community; -import com.google.gson.Gson; +import java.io.Serializable; + import org.dom4j.Node; -import java.io.Serializable; +import com.google.gson.Gson; /** Created by miriam on 01/08/2018. */ public class ZenodoCommunity implements Serializable { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java index 2bc1ab3cf..e9b948b2b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/InterfaceAdapter.java @@ -1,10 +1,10 @@ package eu.dnetlib.dhp.bulktag.criteria; -import com.google.gson.*; - import java.lang.reflect.Type; +import com.google.gson.*; + public class InterfaceAdapter implements JsonSerializer, JsonDeserializer { private static final String CLASSNAME = "CLASSNAME"; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java index 19d6c4615..fe46c6936 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/criteria/VerbResolver.java @@ -1,16 +1,16 @@ package eu.dnetlib.dhp.bulktag.criteria; -import io.github.classgraph.ClassGraph; -import io.github.classgraph.ClassInfo; -import io.github.classgraph.ClassInfoList; -import io.github.classgraph.ScanResult; - import java.io.Serializable; import java.lang.reflect.InvocationTargetException; import java.util.Map; import java.util.stream.Collectors; +import io.github.classgraph.ClassGraph; +import io.github.classgraph.ClassInfo; +import io.github.classgraph.ClassInfoList; +import io.github.classgraph.ScanResult; + public class VerbResolver implements Serializable { private Map> map = null; // = new HashMap<>(); private final ClassGraph classgraph = new ClassGraph(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index f20678f7b..72e0a63fa 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -1,11 +1,12 @@ package eu.dnetlib.dhp.bulktag; -import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.schema.oaf.Dataset; -import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; -import eu.dnetlib.dhp.schema.oaf.Publication; -import eu.dnetlib.dhp.schema.oaf.Software; +import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; @@ -18,15 +19,15 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; +import com.fasterxml.jackson.databind.ObjectMapper; -import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY_INDICATOR; +import eu.dnetlib.dhp.schema.oaf.Dataset; +import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct; +import eu.dnetlib.dhp.schema.oaf.Publication; +import eu.dnetlib.dhp.schema.oaf.Software; public class BulkTagJobTest { @@ -34,12 +35,11 @@ public class BulkTagJobTest { public static final String MOCK_IS_LOOK_UP_URL = "BASEURL:8280/is/services/isLookUp"; - public static final String pathMap = - "{ \"author\" : \"$['author'][*]['fullname']\"," - + " \"title\" : \"$['title'][*]['value']\"," - + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," - + " \"contributor\" : \"$['contributor'][*]['value']\"," - + " \"description\" : \"$['description'][*]['value']\"}"; + public static final String pathMap = "{ \"author\" : \"$['author'][*]['fullname']\"," + + " \"title\" : \"$['title'][*]['value']\"," + + " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\"," + + " \"contributor\" : \"$['contributor'][*]['value']\"," + + " \"description\" : \"$['description'][*]['value']\"}"; private static SparkSession spark; @@ -97,7 +97,8 @@ public class BulkTagJobTest { new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), - "-sourcePath", getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), + "-sourcePath", + getClass().getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/no_updates").getPath(), "-taggingConf", taggingConf, "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/dataset", @@ -129,8 +130,8 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectNoPreviousContextTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") - .getPath(); + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/nocontext") + .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -139,7 +140,7 @@ public class BulkTagJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/dataset", "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-pathMap", pathMap @@ -225,9 +226,9 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectPreviousContextNoProvenanceTest() throws Exception { final String sourcePath = getClass() - .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject/contextnoprovenance") + .getPath(); final String pathMap = BulkTagJobTest.pathMap; SparkBulkTagJob .main( @@ -236,7 +237,7 @@ public class BulkTagJobTest { "-isSparkSessionManaged", Boolean.FALSE.toString(), "-sourcePath", sourcePath, "-taggingConf", taggingConf, - "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", + "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-outputPath", workingDir.toString() + "/dataset", "-isLookUpUrl", MOCK_IS_LOOK_UP_URL, "-pathMap", pathMap @@ -307,8 +308,8 @@ public class BulkTagJobTest { @Test public void bulktagByDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") - .getPath(); + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource") + .getPath(); SparkBulkTagJob .main( new String[] { @@ -379,9 +380,9 @@ public class BulkTagJobTest { @Test public void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() - .getResource( - "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/otherresearchproduct/update_zenodocommunity") + .getPath(); SparkBulkTagJob .main( new String[] { @@ -501,8 +502,8 @@ public class BulkTagJobTest { @Test public void bulktagBySubjectDatasourceTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") - .getPath(); + .getResource("/eu/dnetlib/dhp/bulktag/sample/dataset/update_subject_datasource") + .getPath(); SparkBulkTagJob .main( new String[] { @@ -726,9 +727,9 @@ public class BulkTagJobTest { public void bulktagDatasourcewithConstraintsTest() throws Exception { final String sourcePath = getClass() - .getResource( - "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/bulktag/sample/dataset/update_datasourcewithconstraints") + .getPath(); SparkBulkTagJob .main( new String[] { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java index 056c3345c..aaf670fd7 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/CommunityConfigurationFactoryTest.java @@ -1,21 +1,23 @@ package eu.dnetlib.dhp.bulktag; -import com.google.gson.Gson; -import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; -import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory; -import eu.dnetlib.dhp.bulktag.community.Constraint; -import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; -import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.util.*; + import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.DocumentException; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.util.*; +import com.google.gson.Gson; + +import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; +import eu.dnetlib.dhp.bulktag.community.CommunityConfigurationFactory; +import eu.dnetlib.dhp.bulktag.community.Constraint; +import eu.dnetlib.dhp.bulktag.community.SelectionConstraints; +import eu.dnetlib.dhp.bulktag.criteria.VerbResolver; /** Created by miriam on 03/08/2018. */ public class CommunityConfigurationFactoryTest { diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java index b62238089..88ad43b6b 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/countrypropagation/CountryPropagationJobTest.java @@ -103,7 +103,8 @@ public class CountryPropagationJobTest { Assertions.assertEquals(0, verificationDs.filter("size(country) > 2").count()); Dataset countryExploded = verificationDs - .flatMap((FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) + .flatMap( + (FlatMapFunction) row -> row.getCountry().iterator(), Encoders.bean(Country.class)) .map((MapFunction) c -> c.getClassid(), Encoders.STRING()); Assertions.assertEquals(9, countryExploded.count()); @@ -123,10 +124,10 @@ public class CountryPropagationJobTest { country_list .stream() .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), c.getClassid()))); + c -> prova + .add( + new Tuple2<>( + row.getId(), c.getClassid()))); return prova.iterator(); }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())); @@ -178,20 +179,20 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryclassname = verificationDs .flatMap( - (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() - .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), - c.getClassname()))); - return prova.iterator(); - }, - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + (FlatMapFunction>) row -> { + List> prova = new ArrayList(); + List country_list = row.getCountry(); + country_list + .stream() + .forEach( + c -> prova + .add( + new Tuple2<>( + row.getId(), + c.getClassname()))); + return prova.iterator(); + }, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); countryExplodedWithCountryclassname.show(false); Assertions @@ -239,22 +240,22 @@ public class CountryPropagationJobTest { Dataset> countryExplodedWithCountryProvenance = verificationDs .flatMap( - (FlatMapFunction>) row -> { - List> prova = new ArrayList(); - List country_list = row.getCountry(); - country_list - .stream() - .forEach( - c -> prova - .add( - new Tuple2<>( - row.getId(), - c - .getDataInfo() - .getInferenceprovenance()))); - return prova.iterator(); - }, - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + (FlatMapFunction>) row -> { + List> prova = new ArrayList(); + List country_list = row.getCountry(); + country_list + .stream() + .forEach( + c -> prova + .add( + new Tuple2<>( + row.getId(), + c + .getDataInfo() + .getInferenceprovenance()))); + return prova.iterator(); + }, + Encoders.tuple(Encoders.STRING(), Encoders.STRING())); Assertions .assertEquals( diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java index ff63753b8..abed028e1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/projecttoresult/ProjectPropagationJobTest.java @@ -72,14 +72,15 @@ public class ProjectPropagationJobTest { public void NoUpdateTest() throws Exception { final String potentialUpdateDate = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/noupdates/potentialUpdates") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob.main( + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -107,14 +108,15 @@ public class ProjectPropagationJobTest { @Test public void UpdateTenTest() throws Exception { final String potentialUpdatePath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/tenupdates/potentialUpdates") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob.main( + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -143,8 +145,8 @@ public class ProjectPropagationJobTest { .assertEquals( 5, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("50") + .filter( + (FilterFunction) r -> r.getSource().startsWith("50") && r.getTarget().startsWith("40") && r.getRelClass().equals("isProducedBy")) .count()); @@ -152,8 +154,8 @@ public class ProjectPropagationJobTest { .assertEquals( 5, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("40") + .filter( + (FilterFunction) r -> r.getSource().startsWith("40") && r.getTarget().startsWith("50") && r.getRelClass().equals("produces")) .count()); @@ -178,14 +180,15 @@ public class ProjectPropagationJobTest { @Test public void UpdateMixTest() throws Exception { final String potentialUpdatepath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/updatesmixed/potentialUpdates") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource( - "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToProjectThroughSemRelJob.main( + .getResource( + "/eu/dnetlib/dhp/projecttoresult/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToProjectThroughSemRelJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -217,8 +220,8 @@ public class ProjectPropagationJobTest { .assertEquals( 4, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("50") + .filter( + (FilterFunction) r -> r.getSource().startsWith("50") && r.getTarget().startsWith("40") && r.getRelClass().equals("isProducedBy")) .count()); @@ -226,8 +229,8 @@ public class ProjectPropagationJobTest { .assertEquals( 4, verificationDs - .filter((FilterFunction) r -> - r.getSource().startsWith("40") + .filter( + (FilterFunction) r -> r.getSource().startsWith("40") && r.getTarget().startsWith("50") && r.getRelClass().equals("produces")) .count()); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java index 20b20d4ed..d739516fc 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttocommunityfromorganization/ResultToCommunityJobTest.java @@ -67,9 +67,10 @@ public class ResultToCommunityJobTest { @Test public void testSparkResultToCommunityFromOrganizationJob() throws Exception { final String preparedInfoPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") - .getPath(); - SparkResultToCommunityFromOrganizationJob.main( + .getResource("/eu/dnetlib/dhp/resulttocommunityfromorganization/preparedInfo") + .getPath(); + SparkResultToCommunityFromOrganizationJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java index 30be118d1..435b76605 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/ResultToOrganizationJobTest.java @@ -69,15 +69,17 @@ public class ResultToOrganizationJobTest { @Test public void NoUpdateTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") - .getPath(); + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") + .getPath(); final String datasourceOrganizationPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/datasourceOrganization") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToOrganizationFromIstRepoJob.main( + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/noupdate/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -109,15 +111,17 @@ public class ResultToOrganizationJobTest { @Test public void UpdateNoMixTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") - .getPath(); + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/noupdate_updatenomix") + .getPath(); final String datasourceOrganizationPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/datasourceOrganization") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToOrganizationFromIstRepoJob.main( + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatenomix/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(), @@ -172,15 +176,17 @@ public class ResultToOrganizationJobTest { @Test public void UpdateMixTest() throws Exception { final String sourcePath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") - .getPath(); + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/sample/updatemix") + .getPath(); final String datasourceOrganizationPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization") - .getPath(); + .getResource( + "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/datasourceOrganization") + .getPath(); final String alreadyLinkedPath = getClass() - .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked") - .getPath(); - SparkResultToOrganizationFromIstRepoJob.main( + .getResource("/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/updatemix/preparedInfo/alreadyLinked") + .getPath(); + SparkResultToOrganizationFromIstRepoJob + .main( new String[] { "-isTest", Boolean.TRUE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),