From 896de42598a380424ecef82174e9094e9d056456 Mon Sep 17 00:00:00 2001 From: Miriam Baglioni Date: Wed, 20 Nov 2024 17:44:33 +0100 Subject: [PATCH] [CommunityAPI] use of new access point to directly get the organizationCommunityMap and the datasouceCommunityMap for all the communities and subcommunities. To be changed in the propagation code when implemented in the APIs --- .../eu/dnetlib/dhp/api/QueryCommunityAPI.java | 16 +- .../main/java/eu/dnetlib/dhp/api/Utils.java | 237 +++++++++++------- .../dhp/api/model/CommunityEntityMap.java | 9 + .../dhp/api/model/CommunitySummary.java | 15 -- ...ionList.java => EntityIdentifierList.java} | 4 +- .../dhp/api/model/SubCommunitySummary.java | 10 - .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 69 +---- .../PrepareResultCommunitySet.java | 1 + .../PrepareResultCommunitySet.java | 10 +- .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 5 - 10 files changed, 174 insertions(+), 202 deletions(-) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java rename dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/{OrganizationList.java => EntityIdentifierList.java} (57%) delete mode 100644 dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/SubCommunitySummary.java diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java index 4926587d7..e54f02bf3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/QueryCommunityAPI.java @@ -37,12 +37,6 @@ public class QueryCommunityAPI { } - public static String community(String id, String baseURL) throws IOException { - - return get(baseURL + id); - - } - public static String subcommunities(String communityId, String baseURL) throws IOException { return get(baseURL + communityId + "/subcommunities"); @@ -56,6 +50,8 @@ public class QueryCommunityAPI { } + + public static String communityPropagationOrganization(String id, String baseURL) throws IOException { return get(baseURL + id + "/propagationOrganizations"); @@ -68,6 +64,10 @@ public class QueryCommunityAPI { } + public static String propagationOrganizationCommunityMap(String baseURL) throws IOException { + return get(baseURL + "/propagationOrganizationCommunityMap"); + } + @NotNull private static String getBody(HttpURLConnection conn) throws IOException { String body = "{}"; @@ -96,4 +96,8 @@ public class QueryCommunityAPI { public static String subcommunityProjects(String communityId, String subcommunityId, String page, String size, String baseURL) throws IOException { return get(baseURL + communityId + "/subcommunities/projects/" + page + "/" + size + "?subCommunityId=" + subcommunityId); } + + public static String propagationDatasourceCommunityMap(String baseURL) throws IOException { + return get(baseURL + "/propagationDatasourceCommunityMap"); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index f6737a89b..6bcbacf0c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -6,6 +6,7 @@ import java.io.Serializable; import java.util.*; import java.util.stream.Collectors; +import com.fasterxml.jackson.core.type.TypeReference; import org.apache.commons.lang3.StringUtils; import org.jetbrains.annotations.NotNull; @@ -42,7 +43,7 @@ public class Utils implements Serializable { } //PROJECT METHODS - public static CommunityEntityMap getCommunityProjects(String baseURL) throws IOException { + public static CommunityEntityMap getProjectCommunityMap(String baseURL) throws IOException { CommunityEntityMap projectMap = new CommunityEntityMap(); getValidCommunities(baseURL) @@ -110,13 +111,8 @@ public class Utils implements Serializable { contentModel = MAPPER.readValue(response, ContentModel.class); if (!contentModel.getContent().isEmpty()) { - contentModel.getContent().forEach(project -> - updateEntityMap( - communityId, - project.getOpenaireId(), - communityEntityMap, - ModelSupport.getIdPrefix(Project.class) - ) + contentModel.getContent().forEach(project ->communityEntityMap.add( + ModelSupport.getIdPrefix(Project.class) + "|" + project.getOpenaireId(), communityId) ); } } catch (IOException e) { @@ -126,7 +122,7 @@ public class Utils implements Serializable { } while (!contentModel.getLast()); } - private static List addRelevantDatasources( + private static List getCommunityContentProviders( DatasourceQueryFunction datasourceQueryFunction ) { try { @@ -151,39 +147,92 @@ public class Utils implements Serializable { } + /** + * Select the communties with status different from hidden + * @param baseURL the base url of the API to be queried + * @return the list of communities in the CommunityModel class + * @throws IOException + */ + public static List getValidCommunities(String baseURL) throws IOException { + List listCommunity = MAPPER + .readValue(QueryCommunityAPI.communities(baseURL), new TypeReference>() { + }); + return listCommunity.stream() + .filter( + community -> !community.getStatus().equals("hidden") && + (community.getType().equals("ri") || community.getType().equals("community"))) + .collect(Collectors.toList()); + } + + /** + * Sets the Community information from the replies of the communityAPIs + * @param baseURL the base url of the API to be queried + * @param communityModel the communityModel as replied by the APIs + * @return the community set with information from the community model and for the content providers + */ + private static Community getCommunity(String baseURL, CommunityModel communityModel) { + Community community = getCommunity(communityModel); + community.setProviders(getCommunityContentProviders(()->{ + try { + return QueryCommunityAPI.communityDatasource(community.getId(),baseURL); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + return community; + } + + /** + * extends the community configuration for the subcommunity by adding the content providers + * @param baseURL + * @param communityId + * @param sc + * @return + */ + private static @NotNull Community getSubCommunityConfiguration(String baseURL, String communityId, SubCommunityModel sc) { + Community c = getCommunity(sc); + c.setProviders(getCommunityContentProviders(()->{ + try { + return QueryCommunityAPI.subcommunityDatasource(communityId, sc.getSubCommunityId(), baseURL); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + + return c; + } + + /** + * Gets all the sub-comminities fir a given community identifier + * @param communityId + * @param baseURL + * @return + */ + private static List getSubCommunity(String communityId, String baseURL){ + try { + List subcommunities = getSubcommunities(communityId, baseURL); + return subcommunities.stream().map(sc -> + getSubCommunityConfiguration(baseURL, communityId, sc)) + .collect(Collectors.toList()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * prepare the configuration for the communities and sub-communities + * @param baseURL + * @return + * @throws IOException + */ public static CommunityConfiguration getCommunityConfiguration(String baseURL) throws IOException { final Map communities = Maps.newHashMap(); List communityList = getValidCommunities(baseURL); List validCommunities = new ArrayList<>(); - communityList - .forEach(community -> { - try { - CommunityModel cm = MAPPER - .readValue(QueryCommunityAPI.community(community.getId(), baseURL), CommunityModel.class); - validCommunities.add(getCommunity(cm)); - - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - validCommunities.forEach(community->community.setProviders(addRelevantDatasources(()->{ - try { - return QueryCommunityAPI.communityDatasource(community.getId(),baseURL); - } catch (IOException e) { - throw new RuntimeException(e); - } - }))); - - //add subcommunities information if any communityList.forEach(community -> { - try { - List subcommunities = getSubcommunities(community.getId(), baseURL); - subcommunities.forEach(sc -> - validCommunities.add(getSubCommunityConfiguration(baseURL, community.getId(), sc))); - } catch (IOException e) { - throw new RuntimeException(e); - } - + validCommunities.add(getCommunity(baseURL, community)); + validCommunities.addAll(getSubCommunity(community.getId(), baseURL)); }); validCommunities.forEach(community -> { @@ -191,23 +240,16 @@ public class Utils implements Serializable { communities.put(community.getId(), community); }); - return new CommunityConfiguration(communities); } - private static @NotNull Community getSubCommunityConfiguration(String baseURL, String communityId, SubCommunityModel sc) { - Community c = getCommunity(sc); - c.setProviders(addRelevantDatasources(()->{ - try { - return QueryCommunityAPI.subcommunityDatasource(communityId, sc.getSubCommunityId(), baseURL); - } catch (IOException e) { - throw new RuntimeException(e); - } - })); - - return c; - } + /** + * filles the common fields in the community model for both the communityconfiguration and the subcommunityconfiguration + * @param input + * @return + * @param + */ private static Community getCommonConfiguration(C input){ Community c = new Community(); c.setZenodoCommunities(input.getOtherZenodoCommunities()); @@ -241,18 +283,17 @@ public class Utils implements Serializable { return c; } - public static List getValidCommunities(String baseURL) throws IOException { - return MAPPER - .readValue(QueryCommunityAPI.communities(baseURL), CommunitySummary.class) - .stream() - .filter( - community -> !community.getStatus().equals("hidden") && - (community.getType().equals("ri") || community.getType().equals("community"))) - .collect(Collectors.toList()); + public static List getSubcommunities(String communityId, String baseURL) throws IOException { + return MAPPER.readValue(QueryCommunityAPI.subcommunities(communityId, baseURL), new TypeReference>() { + }); } - public static List getSubcommunities(String communityId, String baseURL) throws IOException { - return MAPPER.readValue(QueryCommunityAPI.subcommunities(communityId, baseURL), SubCommunitySummary.class); + public static CommunityEntityMap getOrganizationCommunityMap(String baseURL) throws IOException { + return MAPPER.readValue(QueryCommunityAPI.propagationOrganizationCommunityMap(baseURL), CommunityEntityMap.class); + } + + public static CommunityEntityMap getDatasourceCommunityMap(String baseURL) throws IOException { + return MAPPER.readValue(QueryCommunityAPI.propagationDatasourceCommunityMap(baseURL), CommunityEntityMap.class); } private static void getRelatedOrganizations(String communityId, String baseURL, CommunityEntityMap communityEntityMap){ @@ -260,8 +301,8 @@ public class Utils implements Serializable { try { List associatedOrgs = MAPPER .readValue( - QueryCommunityAPI.communityPropagationOrganization(communityId, baseURL), OrganizationList.class); - associatedOrgs.forEach(o -> updateEntityMap(communityId, o, communityEntityMap, ModelSupport.getIdPrefix(Organization.class))); + QueryCommunityAPI.communityPropagationOrganization(communityId, baseURL), EntityIdentifierList.class); + associatedOrgs.forEach(o -> communityEntityMap.add(ModelSupport.getIdPrefix(Organization.class) + "|" + o, communityId )); } catch (IOException e) { throw new RuntimeException(e); } @@ -273,24 +314,14 @@ public class Utils implements Serializable { try { List associatedOrgs = MAPPER .readValue( - QueryCommunityAPI.subcommunityPropagationOrganization(communityId, subcommunityId, baseURL), OrganizationList.class); - associatedOrgs.forEach(o -> updateEntityMap(communityId, o, communityEntityMap, ModelSupport.getIdPrefix(Organization.class))); + QueryCommunityAPI.subcommunityPropagationOrganization(communityId, subcommunityId, baseURL), EntityIdentifierList.class); + associatedOrgs.forEach(o -> communityEntityMap.add(ModelSupport.getIdPrefix(Organization.class) + "|" + o, communityId )); } catch (IOException e) { throw new RuntimeException(e); } } - private static void updateEntityMap(String communityId, String entityId, CommunityEntityMap communityEntityMap, String entityPrefix){ - - if (!communityEntityMap - .containsKey(entityPrefix + "|" + entityId)) - communityEntityMap.put(entityPrefix + "|" + entityId, new ArrayList<>()); - - communityEntityMap.get(entityPrefix + "|" + entityId).add(communityId); - - } - /** * it returns for each organization the list of associated communities */ @@ -310,6 +341,7 @@ public class Utils implements Serializable { return organizationMap; } + public static List getCommunityIdList(String baseURL) throws IOException { return getValidCommunities(baseURL) .stream() @@ -317,32 +349,45 @@ public class Utils implements Serializable { .collect(Collectors.toList()); } - public static List getDatasourceCommunities(String baseURL) throws IOException { - List validCommunities = getValidCommunities(baseURL); - HashMap> map = new HashMap<>(); - String entityPrefix = ModelSupport.getIdPrefix(Datasource.class) + "|"; + public static CommunityEntityMap getCommunityDatasource(String baseURL) throws IOException { + CommunityEntityMap datasourceMap = new CommunityEntityMap(); - validCommunities.forEach(c -> { - try { - new ObjectMapper() - .readValue(QueryCommunityAPI.communityDatasource(c.getId(), baseURL), DatasourceList.class) - .forEach(d -> { - if (!map.containsKey(d.getOpenaireId())) - map.put(d.getOpenaireId(), new HashSet<>()); + getValidCommunities(baseURL) + .forEach(community -> { + getRelatedDatasource(community.getId(), baseURL, datasourceMap); + try { + List subcommunities = getSubcommunities(community.getId(), baseURL); + subcommunities.forEach(sc -> getRelatedDatasource(community.getId(), sc.getSubCommunityId(), baseURL, datasourceMap)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + return datasourceMap; + } - map.get(d.getOpenaireId()).add(c.getId()); - }); - } catch (IOException e) { - throw new RuntimeException(e); - } - }); + private static void getRelatedDatasource(String communityId, String baseURL, CommunityEntityMap communityEntityMap){ - return map - .keySet() - .stream() - .map(k -> EntityCommunities.newInstance(entityPrefix + k, new ArrayList<>(map.get(k)))) - .collect(Collectors.toList()); + try { + List associatedDatasources = MAPPER + .readValue( + QueryCommunityAPI.communityDatasource(communityId, baseURL), EntityIdentifierList.class); + associatedDatasources.forEach(d -> communityEntityMap.add(ModelSupport.getIdPrefix(Datasource.class) + "|" + d, communityId )); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static void getRelatedDatasource(String communityId, String baseURL,String subcommunityId, CommunityEntityMap communityEntityMap){ + + try { + List associatedDatasources = MAPPER + .readValue( + QueryCommunityAPI.subcommunityDatasource(communityId, subcommunityId, baseURL), EntityIdentifierList.class); + associatedDatasources.forEach(d -> communityEntityMap.add(ModelSupport.getIdPrefix(Datasource.class) + "|" + d, communityId )); + } catch (IOException e) { + throw new RuntimeException(e); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java index ca3eb2857..ce7a59b09 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunityEntityMap.java @@ -18,4 +18,13 @@ public class CommunityEntityMap extends HashMap> { } return super.get(key); } + + public void add(String key, String value){ + if(!super.containsKey(key)){ + super.put(key, new ArrayList<>()); + } + super.get(key).add(value); + } + + } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java deleted file mode 100644 index a0541f7ee..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/CommunitySummary.java +++ /dev/null @@ -1,15 +0,0 @@ - -package eu.dnetlib.dhp.api.model; - -import java.io.Serializable; -import java.util.ArrayList; - -/** - * @author miriam.baglioni - * @Date 06/10/23 - */ -public class CommunitySummary extends ArrayList implements Serializable { - public CommunitySummary() { - super(); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/EntityIdentifierList.java similarity index 57% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/EntityIdentifierList.java index 3c81ad179..31f284289 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/OrganizationList.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/EntityIdentifierList.java @@ -8,9 +8,9 @@ import java.util.ArrayList; * @author miriam.baglioni * @Date 09/10/23 */ -public class OrganizationList extends ArrayList implements Serializable { +public class EntityIdentifierList extends ArrayList implements Serializable { - public OrganizationList() { + public EntityIdentifierList() { super(); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/SubCommunitySummary.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/SubCommunitySummary.java deleted file mode 100644 index 83715d0da..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/model/SubCommunitySummary.java +++ /dev/null @@ -1,10 +0,0 @@ -package eu.dnetlib.dhp.api.model; - -import java.io.Serializable; -import java.util.ArrayList; - -public class SubCommunitySummary extends ArrayList implements Serializable { - public SubCommunitySummary(){ - super(); - } -} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 6f1541afb..b0f16920a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -113,12 +113,19 @@ public class SparkBulkTagJob { spark, inputPath, outputPath, protoMap, cc); execEntityTag( spark, inputPath + "organization", outputPath + "organization", - Utils.getCommunityOrganization(baseURL), Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION, + Utils.getCommunityOrganization(baseURL), + //Utils.getOrganizationCommunityMap(baseURL), + Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION, TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION); execEntityTag( - spark, inputPath + "project", outputPath + "project", Utils.getCommunityProjects(baseURL), + spark, inputPath + "project", outputPath + "project", + Utils.getProjectCommunityMap(baseURL), Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT); - execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL)); + execEntityTag( + spark, inputPath + "datasource", outputPath + "datasource", + Utils.getCommunityDatasource(baseURL), + //Utils.getDatasourceCommunityMap(baseURL), + Datasource.class, TaggingConstants.CLASS_ID_DATASOURCE, TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE); }); } @@ -184,62 +191,6 @@ public class SparkBulkTagJob { .json(inputPath); } - private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath, - List datasourceCommunities) { - Dataset datasource = readPath(spark, inputPath + "datasource", Datasource.class); - - Dataset dc = spark - .createDataset(datasourceCommunities, Encoders.bean(EntityCommunities.class)); - - datasource - .joinWith(dc, datasource.col("id").equalTo(dc.col("entityId")), "left") - .map((MapFunction, Datasource>) t2 -> { - Datasource ds = t2._1(); - if (t2._2() != null) { - - List context = Optional - .ofNullable(ds.getContext()) - .map(v -> v.stream().map(c -> c.getId()).collect(Collectors.toList())) - .orElse(new ArrayList<>()); - - if (!Optional.ofNullable(ds.getContext()).isPresent()) - ds.setContext(new ArrayList<>()); - - t2._2().getCommunitiesId().forEach(c -> { - if (!context.contains(c)) { - Context con = new Context(); - con.setId(c); - con - .setDataInfo( - Arrays - .asList( - OafMapperUtils - .dataInfo( - false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false, - OafMapperUtils - .qualifier( - TaggingConstants.CLASS_ID_DATASOURCE, - TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE, - ModelConstants.DNET_PROVENANCE_ACTIONS, - ModelConstants.DNET_PROVENANCE_ACTIONS), - "1"))); - ds.getContext().add(con); - } - }); - } - return ds; - }, Encoders.bean(Datasource.class)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + "datasource"); - - readPath(spark, outputPath + "datasource", Datasource.class) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(inputPath + "datasource"); - } private static void extendCommunityConfigurationForEOSC(SparkSession spark, String inputPath, CommunityConfiguration cc) { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index be31cd46c..6b627f466 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -52,6 +52,7 @@ public class PrepareResultCommunitySet { log.info("baseURL: {}", baseURL); final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL); + //final CommunityEntityMap organizationMap = Utils.getOrganizationCommunityMap(baseURL); log.info("organizationMap: {}", new Gson().toJson(organizationMap)); SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index 512dfa9be..67a7c56cf 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -2,13 +2,11 @@ package eu.dnetlib.dhp.resulttocommunityfromproject; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.*; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; @@ -18,16 +16,10 @@ import org.apache.spark.sql.types.StructType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.google.gson.Gson; - import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.api.model.CommunityEntityMap; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; -import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultOrganizations; import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Relation; -import scala.Tuple2; public class PrepareResultCommunitySet { @@ -55,7 +47,7 @@ public class PrepareResultCommunitySet { final String baseURL = parser.get("baseURL"); log.info("baseURL: {}", baseURL); - final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL); + final CommunityEntityMap projectsMap = Utils.getProjectCommunityMap(baseURL); SparkConf conf = new SparkConf(); diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index 9babc78d5..41bea45dd 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -6,15 +6,11 @@ import static eu.dnetlib.dhp.bulktag.community.TaggingConstants.ZENODO_COMMUNITY import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Arrays; import java.util.List; import com.fasterxml.jackson.core.JsonProcessingException; -import eu.dnetlib.dhp.api.QueryCommunityAPI; import eu.dnetlib.dhp.api.Utils; -import eu.dnetlib.dhp.api.model.CommunityModel; import eu.dnetlib.dhp.api.model.SubCommunityModel; -import eu.dnetlib.dhp.api.model.SubCommunitySummary; import eu.dnetlib.dhp.bulktag.community.CommunityConfiguration; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; @@ -25,7 +21,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession;