forked from D-Net/dnet-hadoop
Merge branch 'master' of https://code-repo.d4science.org/D-Net/dnet-hadoop
This commit is contained in:
commit
a34c8b6f81
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("contains_ignorecase")
|
@VerbClass("contains_caseinsentive")
|
||||||
public class ContainsVerbIgnoreCase implements Selection, Serializable {
|
public class ContainsVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("equals_ignorecase")
|
@VerbClass("equals_caseinsentive")
|
||||||
public class EqualVerbIgnoreCase implements Selection, Serializable {
|
public class EqualVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("not_contains_ignorecase")
|
@VerbClass("not_contains_caseinsentive")
|
||||||
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
|
public class NotContainsVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -3,7 +3,7 @@ package eu.dnetlib.dhp.bulktag.criteria;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
@VerbClass("not_equals_ignorecase")
|
@VerbClass("not_equals_caseinsentive")
|
||||||
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
|
public class NotEqualVerbIgnoreCase implements Selection, Serializable {
|
||||||
|
|
||||||
private String param;
|
private String param;
|
||||||
|
|
|
@ -769,4 +769,52 @@ public class BulkTagJobTest {
|
||||||
.assertEquals(
|
.assertEquals(
|
||||||
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
|
3, idExplodeCommunity.filter("provenance = 'community:datasource'").count());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void bulktagPublicationwithConstraintsTest() throws Exception {
|
||||||
|
|
||||||
|
final String sourcePath = getClass()
|
||||||
|
.getResource(
|
||||||
|
"/eu/dnetlib/dhp/bulktag/sample/publication/orcidbulktagfordatasource")
|
||||||
|
.getPath();
|
||||||
|
SparkBulkTagJob
|
||||||
|
.main(
|
||||||
|
new String[] {
|
||||||
|
"-isTest", Boolean.TRUE.toString(),
|
||||||
|
"-isSparkSessionManaged", Boolean.FALSE.toString(),
|
||||||
|
"-sourcePath", sourcePath,
|
||||||
|
"-taggingConf", IOUtils
|
||||||
|
.toString(
|
||||||
|
BulkTagJobTest.class
|
||||||
|
.getResourceAsStream(
|
||||||
|
"/eu/dnetlib/dhp/bulktag/communityconfiguration/tagging_conf_neanias.xml")),
|
||||||
|
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Publication",
|
||||||
|
"-outputPath", workingDir.toString() + "/publication",
|
||||||
|
"-isLookUpUrl", MOCK_IS_LOOK_UP_URL,
|
||||||
|
"-pathMap", pathMap
|
||||||
|
});
|
||||||
|
|
||||||
|
final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext());
|
||||||
|
|
||||||
|
JavaRDD<Publication> tmp = sc
|
||||||
|
.textFile(workingDir.toString() + "/publication")
|
||||||
|
.map(item -> OBJECT_MAPPER.readValue(item, Publication.class));
|
||||||
|
|
||||||
|
Assertions.assertEquals(2, tmp.count());
|
||||||
|
org.apache.spark.sql.Dataset<Publication> verificationDataset = spark
|
||||||
|
.createDataset(tmp.rdd(), Encoders.bean(Publication.class));
|
||||||
|
|
||||||
|
verificationDataset.createOrReplaceTempView("dataset");
|
||||||
|
String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name "
|
||||||
|
+ "from dataset "
|
||||||
|
+ "lateral view explode(context) c as MyT "
|
||||||
|
+ "lateral view explode(MyT.datainfo) d as MyD "
|
||||||
|
+ "where MyD.inferenceprovenance = 'bulktagging'";
|
||||||
|
|
||||||
|
org.apache.spark.sql.Dataset<Row> idExplodeCommunity = spark.sql(query);
|
||||||
|
|
||||||
|
idExplodeCommunity.show(false);
|
||||||
|
Assertions.assertEquals(0, idExplodeCommunity.count());
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
<communities>
|
||||||
|
<community id="neanias">
|
||||||
|
<subjects/>
|
||||||
|
<datasources>
|
||||||
|
<datasource>
|
||||||
|
<openaireId>openaire____::806360c771262b4d6770e7cdf04b5c5a</openaireId>
|
||||||
|
<selcriteria>{"criteria":[{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0002-7883-0894"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-8608-5743"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-9785-1781"}]},{"constraint":[{"verb":"equals","field":"orcid","value":"0000-0001-9684-8863"}]},{"constraint":[{"verb":"equals_caseinsentive","field":"orcid","value":"0000-0002-1673-2084"}]}]}</selcriteria>
|
||||||
|
</datasource>
|
||||||
|
</datasources>
|
||||||
|
<zenodocommunities>
|
||||||
|
<zenodocommunity>
|
||||||
|
<zenodoid>dimpo</zenodoid>
|
||||||
|
<selcriteria/>
|
||||||
|
</zenodocommunity>
|
||||||
|
</zenodocommunities>
|
||||||
|
<organizations/>
|
||||||
|
</community>
|
||||||
|
</communities>
|
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue