forked from D-Net/dnet-hadoop
[Bulk Tag Datasource] fixed issue with verb name and add new test for neanias selection for orcid
This commit is contained in:
parent
5f9383b2d9
commit
cee7a45b1d
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("contains_ignorecase")
|
@VerbClass("contains_caseinsentive")
|
||||||
public class ContainsVerbIgnoreCase implements Selection, Serializable {
|
public class ContainsVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("equals_ignorecase")
|
@VerbClass("equals_caseinsentive")
|
||||||
public class EqualVerbIgnoreCase implements Selection, Serializable {
|
public class EqualVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("not_contains_ignorecase")
|
@VerbClass("not_contains_caseinsentive")
|
||||||
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
|
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("not_equals_ignorecase")
|
@VerbClass("not_equals_caseinsentive")
|
||||||
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
|
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -769,4 +769,52 @@ public class BulkTagJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
|
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void bulktagPublicationwithConstraintsTest() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/bulktag/sample/publication/orcidbulktagfordatasource")
|
||||||
|
.getPath();
|
||||||
|
SparkBulkTagJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-taggingConf", IOUtils
|
||||||
|
.toString(
|
||||||
|
BulkTagJobTest.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_neanias.xml")),
|
||||||
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||||
|
"-outputPath", workingDir.toString() + "/publication",
|
||||||
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
|
"-pathMap", pathMap
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Publication> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/publication")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, tmp.count());
|
||||||
|
org.apache.spark.sql.Dataset<Publication> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
|
||||||
|
+ "from dataset "
|
||||||
|
+ "lateral view explode(context) c as MyT "
|
||||||
|
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||||
|
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||||
|
|
||||||
|
idExplodeCommunity.show(false);
|
||||||
|
Assertions.assertEquals(0, idExplodeCommunity.count());
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
<communities>
|
||||||
|
<community id="neanias">
|
||||||
|
<subjects/>
|
||||||
|
<datasources>
|
||||||
|
<datasource>
|
||||||
|
<openaireId>openaire____::806360c771262b4d6770e7cdf04b5c5a</openaireId>
|
||||||
|
<selcriteria>{"criteria":[{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0002-7883-0894"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-8608-5743"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-9785-1781"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-9684-8863"}]},{"constraint":[{"verb":"equals_caseinsentive","field":"orcid","value":"0000-0002-1673-2084"}]}]}</selcriteria>
|
||||||
|
</datasource>
|
||||||
|
</datasources>
|
||||||
|
<zenodocommunities>
|
||||||
|
<zenodocommunity>
|
||||||
|
<zenodoid>dimpo</zenodoid>
|
||||||
|
<selcriteria/>
|
||||||
|
</zenodocommunity>
|
||||||
|
</zenodocommunities>
|
||||||
|
<organizations/>
|
||||||
|
</community>
|
||||||
|
</communities>
|
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue