forked from D-Net/dnet-hadoop
Merge pull request 'advConstraintsInBeta' (#288) from advConstraintsInBeta into master
Reviewed-on: D-Net/dnet-hadoop#288
This commit is contained in:
commit
4a4ca634f0
|
@ -58,9 +58,12 @@ public class MakeTarArchive implements Serializable {
|
||||||
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, rename);
|
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, rename);
|
||||||
|
|
||||||
}
|
}
|
||||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit) throws IOException{
|
|
||||||
makeTArArchive(fileSystem,inputPath,outputPath,gBperSplit,false);
|
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
||||||
|
throws IOException {
|
||||||
|
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit,
|
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit,
|
||||||
boolean rename)
|
boolean rename)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
|
@ -24,7 +24,8 @@ public class Community implements Serializable {
|
||||||
public boolean isValid() {
|
public boolean isValid() {
|
||||||
return !getSubjects().isEmpty()
|
return !getSubjects().isEmpty()
|
||||||
|| !getProviders().isEmpty()
|
|| !getProviders().isEmpty()
|
||||||
|| !getZenodoCommunities().isEmpty();
|
|| !getZenodoCommunities().isEmpty()
|
||||||
|
|| getConstraints().getCriteria() != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
|
|
|
@ -85,9 +85,23 @@ public class CommunityConfigurationFactory {
|
||||||
c.setSubjects(parseSubjects(node));
|
c.setSubjects(parseSubjects(node));
|
||||||
c.setProviders(parseDatasources(node));
|
c.setProviders(parseDatasources(node));
|
||||||
c.setZenodoCommunities(parseZenodoCommunities(node));
|
c.setZenodoCommunities(parseZenodoCommunities(node));
|
||||||
|
c.setConstraints(parseConstrains(node));
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static SelectionConstraints parseConstrains(Node node) {
|
||||||
|
Node advConstsNode = node.selectSingleNode("./advancedConstraints");
|
||||||
|
if (advConstsNode == null || StringUtils.isBlank(StringUtils.trim(advConstsNode.getText()))) {
|
||||||
|
return new SelectionConstraints();
|
||||||
|
}
|
||||||
|
SelectionConstraints selectionConstraints = new Gson()
|
||||||
|
.fromJson(advConstsNode.getText(), SelectionConstraints.class);
|
||||||
|
|
||||||
|
selectionConstraints.setSelection(resolver);
|
||||||
|
log.info("number of selection constraints set " + selectionConstraints.getCriteria().size());
|
||||||
|
return selectionConstraints;
|
||||||
|
}
|
||||||
|
|
||||||
private static List<String> parseSubjects(final Node node) {
|
private static List<String> parseSubjects(final Node node) {
|
||||||
|
|
||||||
final List<String> subjects = Lists.newArrayList();
|
final List<String> subjects = Lists.newArrayList();
|
||||||
|
|
|
@ -139,7 +139,7 @@ public class ResultTagger implements Serializable {
|
||||||
.getSelectionConstraintsMap()
|
.getSelectionConstraintsMap()
|
||||||
.keySet()
|
.keySet()
|
||||||
.forEach(communityId -> {
|
.forEach(communityId -> {
|
||||||
if (conf.getSelectionConstraintsMap().get(communityId) != null &&
|
if (conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null &&
|
||||||
conf
|
conf
|
||||||
.getSelectionConstraintsMap()
|
.getSelectionConstraintsMap()
|
||||||
.get(communityId)
|
.get(communityId)
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/04/23
|
||||||
|
*/
|
||||||
|
|
||||||
|
@VerbClass("starts_with_caseinsensitive")
|
||||||
|
public class StartsWithIgnoreCaseVerb implements Selection, Serializable {
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public StartsWithIgnoreCaseVerb() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public StartsWithIgnoreCaseVerb(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.toLowerCase().startsWith(param.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author miriam.baglioni
|
||||||
|
* @Date 06/04/23
|
||||||
|
*/
|
||||||
|
|
||||||
|
@VerbClass("starts_with")
|
||||||
|
public class StartsWithVerb implements Selection, Serializable {
|
||||||
|
private String param;
|
||||||
|
|
||||||
|
public StartsWithVerb() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public StartsWithVerb(final String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(String value) {
|
||||||
|
return value.startsWith(param);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParam() {
|
||||||
|
return param;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParam(String param) {
|
||||||
|
this.param = param;
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,7 +39,10 @@ public class BulkTagJobTest {
|
||||||
+ " \"title\" : \"$['title'][*]['value']\","
|
+ " \"title\" : \"$['title'][*]['value']\","
|
||||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||||
+ " \"description\" : \"$['description'][*]['value']\"}";
|
+ " \"description\" : \"$['description'][*]['value']\", "
|
||||||
|
+ " \"subject\" :\"$['subject'][*]['value']\" , " +
|
||||||
|
"\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"" +
|
||||||
|
"} ";
|
||||||
|
|
||||||
private static SparkSession spark;
|
private static SparkSession spark;
|
||||||
|
|
||||||
|
@ -763,7 +766,7 @@ public class BulkTagJobTest {
|
||||||
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||||
|
|
||||||
idExplodeCommunity.show(false);
|
idExplodeCommunity.show(false);
|
||||||
Assertions.assertEquals(3, idExplodeCommunity.count());
|
Assertions.assertEquals(4, idExplodeCommunity.count());
|
||||||
|
|
||||||
Assertions
|
Assertions
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
|
|
|
@ -844,6 +844,88 @@
|
||||||
<organizations/>
|
<organizations/>
|
||||||
</community>
|
</community>
|
||||||
<community id="dariah">
|
<community id="dariah">
|
||||||
|
<advancedConstraints>
|
||||||
|
{
|
||||||
|
"criteria": [
|
||||||
|
{
|
||||||
|
"constraint": [
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "North America"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "contains",
|
||||||
|
"field": "fos",
|
||||||
|
"value": "05"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"constraint": [
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "North America"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "contains",
|
||||||
|
"field": "fos",
|
||||||
|
"value": "06"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"constraint": [
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "Mexico"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "United States"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "Canada"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "contains",
|
||||||
|
"field": "fos",
|
||||||
|
"value": "05"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"constraint": [
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "Mexico"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "United States"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "equals_caseinsensitive",
|
||||||
|
"field": "subject",
|
||||||
|
"value": "Canada"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"verb": "contains",
|
||||||
|
"field": "fos",
|
||||||
|
"value": "06"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
</advancedConstraints>
|
||||||
<subjects/>
|
<subjects/>
|
||||||
<datasources>
|
<datasources>
|
||||||
<datasource>
|
<datasource>
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue