forked from D-Net/dnet-hadoop
Merge pull request 'advConstraintsInBeta' (#288) from advConstraintsInBeta into master
Reviewed-on: D-Net/dnet-hadoop#288
This commit is contained in:
commit
4a4ca634f0
|
@ -58,9 +58,12 @@ public class MakeTarArchive implements Serializable {
|
|||
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, rename);
|
||||
|
||||
}
|
||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit) throws IOException{
|
||||
|
||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit)
|
||||
throws IOException {
|
||||
makeTArArchive(fileSystem, inputPath, outputPath, gBperSplit, false);
|
||||
}
|
||||
|
||||
public static void makeTArArchive(FileSystem fileSystem, String inputPath, String outputPath, int gBperSplit,
|
||||
boolean rename)
|
||||
throws IOException {
|
||||
|
|
|
@ -24,7 +24,8 @@ public class Community implements Serializable {
|
|||
public boolean isValid() {
|
||||
return !getSubjects().isEmpty()
|
||||
|| !getProviders().isEmpty()
|
||||
|| !getZenodoCommunities().isEmpty();
|
||||
|| !getZenodoCommunities().isEmpty()
|
||||
|| getConstraints().getCriteria() != null;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
|
|
|
@ -85,9 +85,23 @@ public class CommunityConfigurationFactory {
|
|||
c.setSubjects(parseSubjects(node));
|
||||
c.setProviders(parseDatasources(node));
|
||||
c.setZenodoCommunities(parseZenodoCommunities(node));
|
||||
c.setConstraints(parseConstrains(node));
|
||||
return c;
|
||||
}
|
||||
|
||||
private static SelectionConstraints parseConstrains(Node node) {
|
||||
Node advConstsNode = node.selectSingleNode("./advancedConstraints");
|
||||
if (advConstsNode == null || StringUtils.isBlank(StringUtils.trim(advConstsNode.getText()))) {
|
||||
return new SelectionConstraints();
|
||||
}
|
||||
SelectionConstraints selectionConstraints = new Gson()
|
||||
.fromJson(advConstsNode.getText(), SelectionConstraints.class);
|
||||
|
||||
selectionConstraints.setSelection(resolver);
|
||||
log.info("number of selection constraints set " + selectionConstraints.getCriteria().size());
|
||||
return selectionConstraints;
|
||||
}
|
||||
|
||||
private static List<String> parseSubjects(final Node node) {
|
||||
|
||||
final List<String> subjects = Lists.newArrayList();
|
||||
|
|
|
@ -139,7 +139,7 @@ public class ResultTagger implements Serializable {
|
|||
.getSelectionConstraintsMap()
|
||||
.keySet()
|
||||
.forEach(communityId -> {
|
||||
if (conf.getSelectionConstraintsMap().get(communityId) != null &&
|
||||
if (conf.getSelectionConstraintsMap().get(communityId).getCriteria() != null &&
|
||||
conf
|
||||
.getSelectionConstraintsMap()
|
||||
.get(communityId)
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 06/04/23
|
||||
*/
|
||||
|
||||
@VerbClass("starts_with_caseinsensitive")
|
||||
public class StartsWithIgnoreCaseVerb implements Selection, Serializable {
|
||||
private String param;
|
||||
|
||||
public StartsWithIgnoreCaseVerb() {
|
||||
}
|
||||
|
||||
public StartsWithIgnoreCaseVerb(final String param) {
|
||||
this.param = param;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean apply(String value) {
|
||||
return value.toLowerCase().startsWith(param.toLowerCase());
|
||||
}
|
||||
|
||||
public String getParam() {
|
||||
return param;
|
||||
}
|
||||
|
||||
public void setParam(String param) {
|
||||
this.param = param;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
package eu.dnetlib.dhp.bulktag.criteria;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* @author miriam.baglioni
|
||||
* @Date 06/04/23
|
||||
*/
|
||||
|
||||
@VerbClass("starts_with")
|
||||
public class StartsWithVerb implements Selection, Serializable {
|
||||
private String param;
|
||||
|
||||
public StartsWithVerb() {
|
||||
}
|
||||
|
||||
public StartsWithVerb(final String param) {
|
||||
this.param = param;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean apply(String value) {
|
||||
return value.startsWith(param);
|
||||
}
|
||||
|
||||
public String getParam() {
|
||||
return param;
|
||||
}
|
||||
|
||||
public void setParam(String param) {
|
||||
this.param = param;
|
||||
}
|
||||
}
|
|
@ -39,7 +39,10 @@ public class BulkTagJobTest {
|
|||
+ " \"title\" : \"$['title'][*]['value']\","
|
||||
+ " \"orcid\" : \"$['author'][*]['pid'][*][?(@['key']=='ORCID')]['value']\","
|
||||
+ " \"contributor\" : \"$['contributor'][*]['value']\","
|
||||
+ " \"description\" : \"$['description'][*]['value']\"}";
|
||||
+ " \"description\" : \"$['description'][*]['value']\", "
|
||||
+ " \"subject\" :\"$['subject'][*]['value']\" , " +
|
||||
"\"fos\" : \"$['subject'][?(@['qualifier']['classid']=='FOS')].value\"" +
|
||||
"} ";
|
||||
|
||||
private static SparkSession spark;
|
||||
|
||||
|
@ -763,7 +766,7 @@ public class BulkTagJobTest {
|
|||
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||
|
||||
idExplodeCommunity.show(false);
|
||||
Assertions.assertEquals(3, idExplodeCommunity.count());
|
||||
Assertions.assertEquals(4, idExplodeCommunity.count());
|
||||
|
||||
Assertions
|
||||
.assertEquals(
|
||||
|
|
|
@ -844,6 +844,88 @@
|
|||
<organizations/>
|
||||
</community>
|
||||
<community id="dariah">
|
||||
<advancedConstraints>
|
||||
{
|
||||
"criteria": [
|
||||
{
|
||||
"constraint": [
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "North America"
|
||||
},
|
||||
{
|
||||
"verb": "contains",
|
||||
"field": "fos",
|
||||
"value": "05"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"constraint": [
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "North America"
|
||||
},
|
||||
{
|
||||
"verb": "contains",
|
||||
"field": "fos",
|
||||
"value": "06"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"constraint": [
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "Mexico"
|
||||
},
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "United States"
|
||||
},
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "Canada"
|
||||
},
|
||||
{
|
||||
"verb": "contains",
|
||||
"field": "fos",
|
||||
"value": "05"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"constraint": [
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "Mexico"
|
||||
},
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "United States"
|
||||
},
|
||||
{
|
||||
"verb": "equals_caseinsensitive",
|
||||
"field": "subject",
|
||||
"value": "Canada"
|
||||
},
|
||||
{
|
||||
"verb": "contains",
|
||||
"field": "fos",
|
||||
"value": "06"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
</advancedConstraints>
|
||||
<subjects/>
|
||||
<datasources>
|
||||
<datasource>
|
||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue