[Bulk Tag Datasource] fixed issue with verb name and add new test for neanias selection for orcid

This commit is contained in:
Miriam Baglioni 2022-11-21 18:10:20 +01:00
parent 5f9383b2d9
commit cee7a45b1d
9 changed files with 82 additions and 4 deletions

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable;
@VerbClass("contains_ignorecase")
@VerbClass("contains_caseinsentive")
public class ContainsVerbIgnoreCase implements Selection, Serializable {
private String param;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable;
@VerbClass("equals_ignorecase")
@VerbClass("equals_caseinsentive")
public class EqualVerbIgnoreCase implements Selection, Serializable {
private String param;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable;
@VerbClass("not_contains_ignorecase")
@VerbClass("not_contains_caseinsentive")
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
private String param;

View File

@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
import java.io.Serializable;
@VerbClass("not_equals_ignorecase")
@VerbClass("not_equals_caseinsentive")
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
private String param;

View File

@ -769,4 +769,52 @@ public class BulkTagJobTest {
.assertEquals(
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
}
@Test
void bulktagPublicationwithConstraintsTest() throws Exception {
final String sourcePath = getClass()
.getResource(
"/eu/dnetlib/dhp/bulktag/sample/publication/orcidbulktagfordatasource")
.getPath();
SparkBulkTagJob
.main(
new String[] {
"-isTest", Boolean.TRUE.toString(),
"-isSparkSessionManaged", Boolean.FALSE.toString(),
"-sourcePath", sourcePath,
"-taggingConf", IOUtils
.toString(
BulkTagJobTest.class
.getResourceAsStream(
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_neanias.xml")),
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
"-outputPath", workingDir.toString() + "/publication",
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
"-pathMap", pathMap
});
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
JavaRDD<Publication> tmp = sc
.textFile(workingDir.toString() + "/publication")
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
Assertions.assertEquals(2, tmp.count());
org.apache.spark.sql.Dataset<Publication> verificationDataset = spark
.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
verificationDataset.createOrReplaceTempView("dataset");
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
+ "from dataset "
+ "lateral view explode(context) c as MyT "
+ "lateral view explode(MyT.datainfo) d as MyD "
+ "where MyD.inferenceprovenance = 'bulktagging'";
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
idExplodeCommunity.show(false);
Assertions.assertEquals(0, idExplodeCommunity.count());
}
}

View File

@ -0,0 +1,18 @@
<communities>
<community id="neanias">
<subjects/>
<datasources>
<datasource>
<openaireId>openaire____::806360c771262b4d6770e7cdf04b5c5a</openaireId>
<selcriteria>{"criteria":[{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0002-7883-0894"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-8608-5743"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-9785-1781"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-9684-8863"}]},{"constraint":[{"verb":"equals_caseinsentive","field":"orcid","value":"0000-0002-1673-2084"}]}]}</selcriteria>
</datasource>
</datasources>
<zenodocommunities>
<zenodocommunity>
<zenodoid>dimpo</zenodoid>
<selcriteria/>
</zenodocommunity>
</zenodocommunities>
<organizations/>
</community>
</communities>