forked from D-Net/dnet-hadoop
This commit is contained in:
parent
5bc97615d5
commit
eaf0a702de
|
@ -14,8 +14,8 @@ import org.jetbrains.annotations.NotNull;
|
||||||
* @Date 06/10/23
|
* @Date 06/10/23
|
||||||
*/
|
*/
|
||||||
public class QueryCommunityAPI {
|
public class QueryCommunityAPI {
|
||||||
private static final String PRODUCTION_BASE_URL = "https://services.openaire.eu/openaire/";
|
|
||||||
private static final String BETA_BASE_URL = "https://beta.services.openaire.eu/openaire/";
|
|
||||||
|
|
||||||
private static String get(String geturl) throws IOException {
|
private static String get(String geturl) throws IOException {
|
||||||
URL url = new URL(geturl);
|
URL url = new URL(geturl);
|
||||||
|
@ -32,35 +32,35 @@ public class QueryCommunityAPI {
|
||||||
return body;
|
return body;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String communities(boolean production) throws IOException {
|
public static String communities(String baseURL) throws IOException {
|
||||||
if (production)
|
|
||||||
return get(PRODUCTION_BASE_URL + "community/communities");
|
|
||||||
return get(BETA_BASE_URL + "community/communities");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String community(String id, boolean production) throws IOException {
|
return get(baseURL + "community/communities");
|
||||||
if (production)
|
|
||||||
return get(PRODUCTION_BASE_URL + "community/" + id);
|
|
||||||
return get(BETA_BASE_URL + "community/" + id);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String communityDatasource(String id, boolean production) throws IOException {
|
|
||||||
if (production)
|
|
||||||
return get(PRODUCTION_BASE_URL + "community/" + id + "/contentproviders");
|
|
||||||
return (BETA_BASE_URL + "community/" + id + "/contentproviders");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String communityPropagationOrganization(String id, boolean production) throws IOException {
|
public static String community(String id, String baseURL ) throws IOException {
|
||||||
if (production)
|
|
||||||
return get(PRODUCTION_BASE_URL + "community/" + id + "/propagationOrganizations");
|
return get(baseURL + "community/" + id);
|
||||||
return get(BETA_BASE_URL + "community/" + id + "/propagationOrganizations");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String communityProjects(String id, String page, String size, boolean production) throws IOException {
|
public static String communityDatasource(String id, String baseURL ) throws IOException {
|
||||||
if (production)
|
|
||||||
return get(PRODUCTION_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
|
return get(baseURL + "community/" + id + "/contentproviders");
|
||||||
return get(BETA_BASE_URL + "community/" + id + "/projects/" + page + "/" + size);
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityPropagationOrganization(String id, String baseURL ) throws IOException {
|
||||||
|
|
||||||
|
return get(baseURL + "community/" + id + "/propagationOrganizations");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String communityProjects(String id, String page, String size, String baseURL ) throws IOException {
|
||||||
|
|
||||||
|
return get(baseURL + "community/" + id + "/projects/" + page + "/" + size);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@NotNull
|
@NotNull
|
||||||
|
|
|
@ -36,14 +36,14 @@ public class Utils implements Serializable {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(Utils.class);
|
private static final Logger log = LoggerFactory.getLogger(Utils.class);
|
||||||
|
|
||||||
public static CommunityConfiguration getCommunityConfiguration(boolean production) throws IOException {
|
public static CommunityConfiguration getCommunityConfiguration(String baseURL) throws IOException {
|
||||||
final Map<String, Community> communities = Maps.newHashMap();
|
final Map<String, Community> communities = Maps.newHashMap();
|
||||||
List<Community> validCommunities = new ArrayList<>();
|
List<Community> validCommunities = new ArrayList<>();
|
||||||
getValidCommunities(production)
|
getValidCommunities(baseURL)
|
||||||
.forEach(community -> {
|
.forEach(community -> {
|
||||||
try {
|
try {
|
||||||
CommunityModel cm = MAPPER
|
CommunityModel cm = MAPPER
|
||||||
.readValue(QueryCommunityAPI.community(community.getId(), production), CommunityModel.class);
|
.readValue(QueryCommunityAPI.community(community.getId(), baseURL), CommunityModel.class);
|
||||||
validCommunities.add(getCommunity(cm));
|
validCommunities.add(getCommunity(cm));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
@ -53,7 +53,7 @@ public class Utils implements Serializable {
|
||||||
try {
|
try {
|
||||||
DatasourceList dl = MAPPER
|
DatasourceList dl = MAPPER
|
||||||
.readValue(
|
.readValue(
|
||||||
QueryCommunityAPI.communityDatasource(community.getId(), production), DatasourceList.class);
|
QueryCommunityAPI.communityDatasource(community.getId(), baseURL), DatasourceList.class);
|
||||||
community.setProviders(dl.stream().map(d -> {
|
community.setProviders(dl.stream().map(d -> {
|
||||||
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
|
if (d.getEnabled() == null || Boolean.FALSE.equals(d.getEnabled()))
|
||||||
return null;
|
return null;
|
||||||
|
@ -98,9 +98,9 @@ public class Utils implements Serializable {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static List<CommunityModel> getValidCommunities(boolean production) throws IOException {
|
public static List<CommunityModel> getValidCommunities(String baseURL) throws IOException {
|
||||||
return MAPPER
|
return MAPPER
|
||||||
.readValue(QueryCommunityAPI.communities(production), CommunitySummary.class)
|
.readValue(QueryCommunityAPI.communities(baseURL), CommunitySummary.class)
|
||||||
.stream()
|
.stream()
|
||||||
.filter(
|
.filter(
|
||||||
community -> !community.getStatus().equals("hidden") &&
|
community -> !community.getStatus().equals("hidden") &&
|
||||||
|
@ -111,15 +111,15 @@ public class Utils implements Serializable {
|
||||||
/**
|
/**
|
||||||
* it returns for each organization the list of associated communities
|
* it returns for each organization the list of associated communities
|
||||||
*/
|
*/
|
||||||
public static CommunityEntityMap getCommunityOrganization(boolean production) throws IOException {
|
public static CommunityEntityMap getCommunityOrganization(String baseURL) throws IOException {
|
||||||
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
CommunityEntityMap organizationMap = new CommunityEntityMap();
|
||||||
getValidCommunities(production)
|
getValidCommunities(baseURL)
|
||||||
.forEach(community -> {
|
.forEach(community -> {
|
||||||
String id = community.getId();
|
String id = community.getId();
|
||||||
try {
|
try {
|
||||||
List<String> associatedOrgs = MAPPER
|
List<String> associatedOrgs = MAPPER
|
||||||
.readValue(
|
.readValue(
|
||||||
QueryCommunityAPI.communityPropagationOrganization(id, production), OrganizationList.class);
|
QueryCommunityAPI.communityPropagationOrganization(id, baseURL), OrganizationList.class);
|
||||||
associatedOrgs.forEach(o -> {
|
associatedOrgs.forEach(o -> {
|
||||||
if (!organizationMap
|
if (!organizationMap
|
||||||
.keySet()
|
.keySet()
|
||||||
|
@ -136,9 +136,10 @@ public class Utils implements Serializable {
|
||||||
return organizationMap;
|
return organizationMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static CommunityEntityMap getCommunityProjects(boolean production) throws IOException {
|
public static CommunityEntityMap getCommunityProjects(String baseURL) throws IOException {
|
||||||
CommunityEntityMap projectMap = new CommunityEntityMap();
|
CommunityEntityMap projectMap = new CommunityEntityMap();
|
||||||
getValidCommunities(production)
|
|
||||||
|
getValidCommunities(baseURL)
|
||||||
.forEach(community -> {
|
.forEach(community -> {
|
||||||
int page = -1;
|
int page = -1;
|
||||||
int size = 100;
|
int size = 100;
|
||||||
|
@ -150,7 +151,7 @@ public class Utils implements Serializable {
|
||||||
.readValue(
|
.readValue(
|
||||||
QueryCommunityAPI
|
QueryCommunityAPI
|
||||||
.communityProjects(
|
.communityProjects(
|
||||||
community.getId(), String.valueOf(page), String.valueOf(size), production),
|
community.getId(), String.valueOf(page), String.valueOf(size), baseURL),
|
||||||
ContentModel.class);
|
ContentModel.class);
|
||||||
if (cm.getContent().size() > 0) {
|
if (cm.getContent().size() > 0) {
|
||||||
cm.getContent().forEach(p -> {
|
cm.getContent().forEach(p -> {
|
||||||
|
|
|
@ -62,8 +62,8 @@ public class SparkBulkTagJob {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final boolean production = Boolean.valueOf(parser.get("production"));
|
final String baseURL = parser.get("baseURL");
|
||||||
log.info("production: {}", production);
|
log.info("baseURL: {}", baseURL);
|
||||||
|
|
||||||
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
|
ProtoMap protoMappingParams = new Gson().fromJson(parser.get("pathMap"), ProtoMap.class);
|
||||||
log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
|
log.info("pathMap: {}", new Gson().toJson(protoMappingParams));
|
||||||
|
@ -79,7 +79,7 @@ public class SparkBulkTagJob {
|
||||||
if (taggingConf != null) {
|
if (taggingConf != null) {
|
||||||
cc = CommunityConfigurationFactory.newInstance(taggingConf);
|
cc = CommunityConfigurationFactory.newInstance(taggingConf);
|
||||||
} else {
|
} else {
|
||||||
cc = Utils.getCommunityConfiguration(production);
|
cc = Utils.getCommunityConfiguration(baseURL);
|
||||||
}
|
}
|
||||||
|
|
||||||
runWithSparkSession(
|
runWithSparkSession(
|
||||||
|
|
|
@ -48,10 +48,10 @@ public class PrepareResultCommunitySet {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final boolean production = Boolean.valueOf(parser.get("production"));
|
final String baseURL = parser.get("baseURL");
|
||||||
log.info("production: {}", production);
|
log.info("baseURL: {}", baseURL);
|
||||||
|
|
||||||
final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(production);
|
final CommunityEntityMap organizationMap = Utils.getCommunityOrganization(baseURL);
|
||||||
log.info("organizationMap: {}", new Gson().toJson(organizationMap));
|
log.info("organizationMap: {}", new Gson().toJson(organizationMap));
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
|
@ -52,11 +52,10 @@ public class PrepareResultCommunitySet {
|
||||||
final String outputPath = parser.get("outputPath");
|
final String outputPath = parser.get("outputPath");
|
||||||
log.info("outputPath: {}", outputPath);
|
log.info("outputPath: {}", outputPath);
|
||||||
|
|
||||||
final boolean production = Boolean.valueOf(parser.get("production"));
|
final String baseURL = parser.get("baseURL");
|
||||||
log.info("production: {}", production);
|
log.info("baseUEL: {}", baseURL);
|
||||||
|
|
||||||
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(production);
|
final CommunityEntityMap projectsMap = Utils.getCommunityProjects(baseURL);
|
||||||
// log.info("projectsMap: {}", new Gson().toJson(projectsMap));
|
|
||||||
|
|
||||||
SparkConf conf = new SparkConf();
|
SparkConf conf = new SparkConf();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue