2020-04-30 11:05:17 +02:00
|
|
|
|
2020-05-11 17:32:06 +02:00
|
|
|
package eu.dnetlib.dhp.bulktag.community;
|
2020-04-30 11:05:17 +02:00
|
|
|
|
2020-05-11 17:38:08 +02:00
|
|
|
import java.io.StringReader;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
2020-03-03 16:38:50 +01:00
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
|
import org.dom4j.Document;
|
|
|
|
import org.dom4j.DocumentException;
|
|
|
|
import org.dom4j.Node;
|
|
|
|
import org.dom4j.io.SAXReader;
|
2021-08-11 12:13:22 +02:00
|
|
|
import org.xml.sax.SAXException;
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-05-11 17:38:08 +02:00
|
|
|
import com.google.common.collect.Lists;
|
|
|
|
import com.google.common.collect.Maps;
|
|
|
|
import com.google.gson.Gson;
|
|
|
|
import com.google.gson.GsonBuilder;
|
|
|
|
|
|
|
|
import eu.dnetlib.dhp.bulktag.criteria.InterfaceAdapter;
|
|
|
|
import eu.dnetlib.dhp.bulktag.criteria.Selection;
|
|
|
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolver;
|
|
|
|
import eu.dnetlib.dhp.bulktag.criteria.VerbResolverFactory;
|
2020-04-30 11:05:17 +02:00
|
|
|
|
2020-04-23 11:48:47 +02:00
|
|
|
/** Created by miriam on 03/08/2018. */
|
2020-03-03 16:38:50 +01:00
|
|
|
public class CommunityConfigurationFactory {
|
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
private static final Log log = LogFactory.getLog(CommunityConfigurationFactory.class);
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2021-05-14 10:58:12 +02:00
|
|
|
private static final VerbResolver resolver = VerbResolverFactory.newInstance();
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
private CommunityConfigurationFactory() {
|
|
|
|
}
|
|
|
|
|
|
|
|
public static CommunityConfiguration newInstance(final String xml) throws DocumentException, SAXException {
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2023-04-05 12:18:39 +02:00
|
|
|
log.info(String.format("parsing community configuration from:\n%s", xml));
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2021-08-11 12:13:22 +02:00
|
|
|
final SAXReader reader = new SAXReader();
|
|
|
|
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
|
|
|
final Document doc = reader.read(new StringReader(xml));
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
final Map<String, Community> communities = Maps.newHashMap();
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
for (final Object o : doc.selectNodes("//community")) {
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
final Node node = (Node) o;
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
final Community community = parseCommunity(node);
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
if (community.isValid()) {
|
|
|
|
communities.put(community.getId(), community);
|
|
|
|
}
|
|
|
|
}
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
log.info(String.format("loaded %s community configuration profiles", communities.size()));
|
2021-05-14 10:58:12 +02:00
|
|
|
log.debug(String.format("loaded community configuration:\n%s", communities));
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
return new CommunityConfiguration(communities);
|
|
|
|
}
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
public static CommunityConfiguration fromJson(final String json) {
|
|
|
|
GsonBuilder builder = new GsonBuilder();
|
|
|
|
builder.registerTypeAdapter(Selection.class, new InterfaceAdapter());
|
|
|
|
Gson gson = builder.create();
|
|
|
|
final CommunityConfiguration conf = gson.fromJson(json, CommunityConfiguration.class);
|
|
|
|
log.info(String.format("loaded %s community configuration profiles", conf.size()));
|
|
|
|
conf.init();
|
|
|
|
log.info("created inverse maps");
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
return conf;
|
|
|
|
}
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
private static Community parseCommunity(final Node node) {
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
final Community c = new Community();
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
c.setId(node.valueOf("./@id"));
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
log.info(String.format("community id: %s", c.getId()));
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
c.setSubjects(parseSubjects(node));
|
2020-05-11 17:32:06 +02:00
|
|
|
c.setProviders(parseDatasources(node));
|
2020-04-30 11:05:17 +02:00
|
|
|
c.setZenodoCommunities(parseZenodoCommunities(node));
|
2022-09-23 16:02:19 +02:00
|
|
|
c.setConstraints(parseConstrains(node));
|
2023-05-24 09:56:23 +02:00
|
|
|
c.setRemoveConstraints(parseRemoveConstrains(node));
|
2020-04-30 11:05:17 +02:00
|
|
|
return c;
|
|
|
|
}
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2022-09-23 16:02:19 +02:00
|
|
|
private static SelectionConstraints parseConstrains(Node node) {
|
2022-12-07 10:38:42 +01:00
|
|
|
Node advConstsNode = node.selectSingleNode("./advancedConstraints");
|
|
|
|
if (advConstsNode == null || StringUtils.isBlank(StringUtils.trim(advConstsNode.getText()))) {
|
2023-04-06 12:22:38 +02:00
|
|
|
return new SelectionConstraints();
|
2022-09-23 16:02:19 +02:00
|
|
|
}
|
2022-09-27 14:55:10 +02:00
|
|
|
SelectionConstraints selectionConstraints = new Gson()
|
2022-12-07 10:38:42 +01:00
|
|
|
.fromJson(advConstsNode.getText(), SelectionConstraints.class);
|
2022-09-23 16:02:19 +02:00
|
|
|
|
|
|
|
selectionConstraints.setSelection(resolver);
|
2023-04-05 12:18:39 +02:00
|
|
|
log.info("number of selection constraints set " + selectionConstraints.getCriteria().size());
|
2022-09-23 16:02:19 +02:00
|
|
|
return selectionConstraints;
|
|
|
|
}
|
|
|
|
|
2023-05-24 09:56:23 +02:00
|
|
|
private static SelectionConstraints parseRemoveConstrains(Node node) {
|
|
|
|
Node constsNode = node.selectSingleNode("./removeConstraints");
|
|
|
|
if (constsNode == null || StringUtils.isBlank(StringUtils.trim(constsNode.getText()))) {
|
|
|
|
return new SelectionConstraints();
|
|
|
|
}
|
|
|
|
SelectionConstraints selectionConstraints = new Gson()
|
2023-05-24 16:20:05 +02:00
|
|
|
.fromJson(constsNode.getText(), SelectionConstraints.class);
|
2023-05-24 09:56:23 +02:00
|
|
|
|
|
|
|
selectionConstraints.setSelection(resolver);
|
|
|
|
log.info("number of selection constraints set " + selectionConstraints.getCriteria().size());
|
|
|
|
return selectionConstraints;
|
|
|
|
}
|
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
private static List<String> parseSubjects(final Node node) {
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
final List<String> subjects = Lists.newArrayList();
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
final List<Node> list = node.selectNodes("./subjects/subject");
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
for (Node n : list) {
|
|
|
|
log.debug("text of the node " + n.getText());
|
|
|
|
subjects.add(StringUtils.trim(n.getText()));
|
|
|
|
}
|
|
|
|
log.info("size of the subject list " + subjects.size());
|
|
|
|
return subjects;
|
|
|
|
}
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-05-11 17:32:06 +02:00
|
|
|
private static List<Provider> parseDatasources(final Node node) {
|
2020-04-30 11:05:17 +02:00
|
|
|
final List<Node> list = node.selectNodes("./datasources/datasource");
|
2020-05-11 17:32:06 +02:00
|
|
|
final List<Provider> providerList = new ArrayList<>();
|
2020-04-30 11:05:17 +02:00
|
|
|
for (Node n : list) {
|
2020-05-11 17:32:06 +02:00
|
|
|
Provider d = new Provider();
|
2020-04-30 11:05:17 +02:00
|
|
|
d.setOpenaireId(n.selectSingleNode("./openaireId").getText());
|
|
|
|
d.setSelCriteria(n.selectSingleNode("./selcriteria"), resolver);
|
2020-05-11 17:32:06 +02:00
|
|
|
providerList.add(d);
|
2020-04-30 11:05:17 +02:00
|
|
|
}
|
2020-05-11 17:32:06 +02:00
|
|
|
log.info("size of the datasource list " + providerList.size());
|
|
|
|
return providerList;
|
2020-04-30 11:05:17 +02:00
|
|
|
}
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2023-10-09 14:26:33 +02:00
|
|
|
private static List<String> parseZenodoCommunities(final Node node) {
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
|
2023-10-09 14:26:33 +02:00
|
|
|
final List<String> zenodoCommunityList = new ArrayList<>();
|
2020-04-30 11:05:17 +02:00
|
|
|
for (Node n : list) {
|
2023-10-09 14:26:33 +02:00
|
|
|
// ZenodoCommunity zc = new ZenodoCommunity();
|
|
|
|
// zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText());
|
|
|
|
// zc.setSelCriteria(n.selectSingleNode("./selcriteria"));
|
2020-03-03 16:38:50 +01:00
|
|
|
|
2023-10-09 14:26:33 +02:00
|
|
|
zenodoCommunityList.add(n.selectSingleNode("./zenodoid").getText());
|
2020-04-30 11:05:17 +02:00
|
|
|
}
|
2020-08-13 18:44:07 +02:00
|
|
|
|
2020-04-30 11:05:17 +02:00
|
|
|
log.info("size of the zenodo community list " + zenodoCommunityList.size());
|
|
|
|
return zenodoCommunityList;
|
|
|
|
}
|
2020-04-23 11:48:47 +02:00
|
|
|
}
|