forked from D-Net/dnet-hadoop
attributes fixes
This commit is contained in:
parent
bde59a7c8f
commit
6af3fd16b6
|
@ -1,10 +1,5 @@
|
||||||
package eu.dnetlib.pace.tree;
|
|
||||||
|
|
||||||
import com.wcohen.ss.AbstractStringDistance;
|
package eu.dnetlib.pace.tree;
|
||||||
import eu.dnetlib.pace.config.Config;
|
|
||||||
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
|
||||||
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
|
||||||
import org.joda.time.DateTime;
|
|
||||||
|
|
||||||
import java.time.DateTimeException;
|
import java.time.DateTimeException;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
|
@ -13,6 +8,14 @@ import java.time.format.DateTimeFormatter;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
|
import com.wcohen.ss.AbstractStringDistance;
|
||||||
|
|
||||||
|
import eu.dnetlib.pace.config.Config;
|
||||||
|
import eu.dnetlib.pace.tree.support.AbstractStringComparator;
|
||||||
|
import eu.dnetlib.pace.tree.support.ComparatorClass;
|
||||||
|
|
||||||
@ComparatorClass("dateRange")
|
@ComparatorClass("dateRange")
|
||||||
public class DateRange extends AbstractStringComparator {
|
public class DateRange extends AbstractStringComparator {
|
||||||
|
|
||||||
|
@ -48,8 +51,7 @@ public class DateRange extends AbstractStringComparator {
|
||||||
Period period = Period.between(d1, d2);
|
Period period = Period.between(d1, d2);
|
||||||
|
|
||||||
return period.getYears() <= YEAR_RANGE ? 1.0 : 0.0;
|
return period.getYears() <= YEAR_RANGE ? 1.0 : 0.0;
|
||||||
}
|
} catch (DateTimeException e) {
|
||||||
catch (DateTimeException e) {
|
|
||||||
return -1.0;
|
return -1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -72,14 +72,34 @@ public class ComparatorTest extends AbstractPaceTest {
|
||||||
CodeMatch codeMatch = new CodeMatch(params);
|
CodeMatch codeMatch = new CodeMatch(params);
|
||||||
|
|
||||||
// names have different codes
|
// names have different codes
|
||||||
assertEquals(0.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ir02", conf));
|
assertEquals(
|
||||||
|
0.0,
|
||||||
|
codeMatch
|
||||||
|
.distance(
|
||||||
|
"physical oceanography at ctd station june 1998 ev02a",
|
||||||
|
"physical oceanography at ctd station june 1998 ir02", conf));
|
||||||
|
|
||||||
// names have same code
|
// names have same code
|
||||||
assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998 ev02a", "physical oceanography at ctd station june 1998 ev02a", conf));
|
assertEquals(
|
||||||
|
1.0,
|
||||||
|
codeMatch
|
||||||
|
.distance(
|
||||||
|
"physical oceanography at ctd station june 1998 ev02a",
|
||||||
|
"physical oceanography at ctd station june 1998 ev02a", conf));
|
||||||
|
|
||||||
// code is not in both names
|
// code is not in both names
|
||||||
assertEquals(-1, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998 ev02a", conf));
|
assertEquals(
|
||||||
assertEquals(1.0, codeMatch.distance("physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998", conf));
|
-1,
|
||||||
|
codeMatch
|
||||||
|
.distance(
|
||||||
|
"physical oceanography at ctd station june 1998",
|
||||||
|
"physical oceanography at ctd station june 1998 ev02a", conf));
|
||||||
|
assertEquals(
|
||||||
|
1.0,
|
||||||
|
codeMatch
|
||||||
|
.distance(
|
||||||
|
"physical oceanography at ctd station june 1998", "physical oceanography at ctd station june 1998",
|
||||||
|
conf));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -3,8 +3,6 @@ package eu.dnetlib.dhp.actionmanager;
|
||||||
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
|
||||||
import org.apache.spark.api.java.function.MapFunction;
|
import org.apache.spark.api.java.function.MapFunction;
|
||||||
import org.apache.spark.sql.Dataset;
|
import org.apache.spark.sql.Dataset;
|
||||||
import org.apache.spark.sql.Encoders;
|
import org.apache.spark.sql.Encoders;
|
||||||
|
@ -15,6 +13,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Subject;
|
import eu.dnetlib.dhp.schema.oaf.Subject;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.raid;
|
package eu.dnetlib.dhp.actionmanager.raid;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID;
|
||||||
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME;
|
||||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
||||||
import eu.dnetlib.dhp.common.Constants;
|
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import java.util.*;
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
import java.util.stream.Collectors;
|
||||||
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
||||||
|
@ -19,30 +19,37 @@ import org.apache.spark.sql.Encoders;
|
||||||
import org.apache.spark.sql.SparkSession;
|
import org.apache.spark.sql.SparkSession;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
||||||
|
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||||
|
import eu.dnetlib.dhp.common.Constants;
|
||||||
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
|
||||||
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_ID;
|
|
||||||
import static eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson.OPENAIRE_DATASOURCE_NAME;
|
|
||||||
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
|
|
||||||
import static eu.dnetlib.dhp.schema.common.ModelConstants.*;
|
|
||||||
import static eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils.*;
|
|
||||||
|
|
||||||
public class GenerateRAiDActionSetJob {
|
public class GenerateRAiDActionSetJob {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class);
|
private static final Logger log = LoggerFactory
|
||||||
|
.getLogger(eu.dnetlib.dhp.actionmanager.raid.GenerateRAiDActionSetJob.class);
|
||||||
|
|
||||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
private static final List<KeyValue> RAID_COLLECTED_FROM = listKeyValues(
|
private static final List<KeyValue> RAID_COLLECTED_FROM = listKeyValues(
|
||||||
OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
|
OPENAIRE_DATASOURCE_ID, OPENAIRE_DATASOURCE_NAME);
|
||||||
|
|
||||||
private static final Qualifier RAID_QUALIFIER = qualifier("raid:openaireinference", "raid:openaireinference", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS);
|
private static final Qualifier RAID_QUALIFIER = qualifier("0049", "Research Activity Identifier", DNET_PUBLICATION_RESOURCE, DNET_PUBLICATION_RESOURCE);
|
||||||
|
|
||||||
|
private static final Qualifier RAID_INFERENCE_QUALIFIER = qualifier(
|
||||||
|
"raid:openaireinference", "Inferred by OpenAIRE", DNET_PROVENANCE_ACTIONS, DNET_PROVENANCE_ACTIONS);
|
||||||
|
|
||||||
private static final DataInfo RAID_DATA_INFO = dataInfo(
|
private static final DataInfo RAID_DATA_INFO = dataInfo(
|
||||||
false, OPENAIRE_DATASOURCE_NAME, true, false, RAID_QUALIFIER, "0.92");
|
false, OPENAIRE_DATASOURCE_NAME, true, false, RAID_INFERENCE_QUALIFIER, "0.92");
|
||||||
|
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
|
|
||||||
|
@ -103,59 +110,71 @@ public class GenerateRAiDActionSetJob {
|
||||||
orp.setId(raidId);
|
orp.setId(raidId);
|
||||||
orp.setCollectedfrom(RAID_COLLECTED_FROM);
|
orp.setCollectedfrom(RAID_COLLECTED_FROM);
|
||||||
orp.setDataInfo(RAID_DATA_INFO);
|
orp.setDataInfo(RAID_DATA_INFO);
|
||||||
orp.setResourcetype(RAID_QUALIFIER);
|
orp
|
||||||
orp.setTitle(
|
.setTitle(
|
||||||
Collections.singletonList(
|
Collections
|
||||||
|
.singletonList(
|
||||||
structuredProperty(
|
structuredProperty(
|
||||||
r.getTitle(),
|
r.getTitle(),
|
||||||
qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE),
|
qualifier("main title", "main title", DNET_DATACITE_TITLE, DNET_DATACITE_TITLE),
|
||||||
RAID_DATA_INFO))
|
RAID_DATA_INFO)));
|
||||||
);
|
|
||||||
orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary()));
|
orp.setDescription(listFields(RAID_DATA_INFO, r.getSummary()));
|
||||||
orp.setAuthor(createAuthors(r.getAuthors()));
|
// orp.setAuthor(createAuthors(r.getAuthors()));
|
||||||
orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER)));
|
orp.setInstance(Collections.singletonList(eu.dnetlib.dhp.actionmanager.Constants.getInstance(RAID_QUALIFIER)));
|
||||||
orp.setSubject(
|
orp
|
||||||
r.getSubjects()
|
.setSubject(
|
||||||
|
r
|
||||||
|
.getSubjects()
|
||||||
.stream()
|
.stream()
|
||||||
.map(s -> subject(s, qualifier(DNET_SUBJECT_KEYWORD, DNET_SUBJECT_KEYWORD, DNET_SUBJECT_TYPOLOGIES, DNET_SUBJECT_TYPOLOGIES), RAID_DATA_INFO))
|
.map(
|
||||||
.collect(Collectors.toList())
|
s -> subject(
|
||||||
);
|
s,
|
||||||
orp.setRelevantdate(
|
qualifier(
|
||||||
Arrays.asList(
|
DNET_SUBJECT_KEYWORD, DNET_SUBJECT_KEYWORD, DNET_SUBJECT_TYPOLOGIES,
|
||||||
structuredProperty(r.getEndDate(), qualifier("endDate","endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO),
|
DNET_SUBJECT_TYPOLOGIES),
|
||||||
structuredProperty(r.getStartDate(), qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE), RAID_DATA_INFO)
|
RAID_DATA_INFO))
|
||||||
)
|
.collect(Collectors.toList()));
|
||||||
);
|
orp
|
||||||
|
.setRelevantdate(
|
||||||
|
Arrays
|
||||||
|
.asList(
|
||||||
|
structuredProperty(
|
||||||
|
r.getEndDate(), qualifier("endDate", "endDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE),
|
||||||
|
RAID_DATA_INFO),
|
||||||
|
structuredProperty(
|
||||||
|
r.getStartDate(),
|
||||||
|
qualifier("startDate", "startDate", DNET_DATACITE_DATE, DNET_DATACITE_DATE),
|
||||||
|
RAID_DATA_INFO)));
|
||||||
orp.setLastupdatetimestamp(now.getTime());
|
orp.setLastupdatetimestamp(now.getTime());
|
||||||
orp.setDateofcollection(r.getStartDate());
|
orp.setDateofacceptance(field(r.getStartDate(), RAID_DATA_INFO));
|
||||||
|
|
||||||
res.add(new AtomicAction<>(OtherResearchProduct.class, orp));
|
res.add(new AtomicAction<>(OtherResearchProduct.class, orp));
|
||||||
|
|
||||||
for (String resultId : r.getIds()) {
|
for (String resultId : r.getIds()) {
|
||||||
Relation rel1 = OafMapperUtils.getRelation(
|
Relation rel1 = OafMapperUtils
|
||||||
|
.getRelation(
|
||||||
raidId,
|
raidId,
|
||||||
resultId,
|
resultId,
|
||||||
ModelConstants.RESULT_RESULT,
|
ModelConstants.RESULT_RESULT,
|
||||||
ModelConstants.OUTCOME,
|
|
||||||
PART,
|
PART,
|
||||||
|
HAS_PART,
|
||||||
RAID_COLLECTED_FROM,
|
RAID_COLLECTED_FROM,
|
||||||
RAID_DATA_INFO,
|
RAID_DATA_INFO,
|
||||||
now.getTime(),
|
now.getTime(),
|
||||||
null,
|
null,
|
||||||
null
|
null);
|
||||||
);
|
Relation rel2 = OafMapperUtils
|
||||||
Relation rel2 = OafMapperUtils.getRelation(
|
.getRelation(
|
||||||
resultId,
|
resultId,
|
||||||
raidId,
|
raidId,
|
||||||
ModelConstants.RESULT_RESULT,
|
ModelConstants.RESULT_RESULT,
|
||||||
ModelConstants.OUTCOME,
|
PART,
|
||||||
IS_PART_OF,
|
IS_PART_OF,
|
||||||
RAID_COLLECTED_FROM,
|
RAID_COLLECTED_FROM,
|
||||||
RAID_DATA_INFO,
|
RAID_DATA_INFO,
|
||||||
now.getTime(),
|
now.getTime(),
|
||||||
null,
|
null,
|
||||||
null
|
null);
|
||||||
);
|
|
||||||
res.add(new AtomicAction<>(Relation.class, rel1));
|
res.add(new AtomicAction<>(Relation.class, rel1));
|
||||||
res.add(new AtomicAction<>(Relation.class, rel2));
|
res.add(new AtomicAction<>(Relation.class, rel2));
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,2 +1,5 @@
|
||||||
package eu.dnetlib.dhp.actionmanager.raid.model;public class GenerateRAiDActionSetJob {
|
|
||||||
|
package eu.dnetlib.dhp.actionmanager.raid.model;
|
||||||
|
|
||||||
|
public class GenerateRAiDActionSetJob {
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.raid.model;
|
package eu.dnetlib.dhp.actionmanager.raid.model;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
@ -15,8 +16,11 @@ public class RAiDEntity implements Serializable {
|
||||||
String title;
|
String title;
|
||||||
String summary;
|
String summary;
|
||||||
|
|
||||||
public RAiDEntity(){}
|
public RAiDEntity() {
|
||||||
public RAiDEntity(String raid, List<String> authors, String startDate, String endDate, List<String> subjects, List<String> titles, List<String> ids, String title, String summary) {
|
}
|
||||||
|
|
||||||
|
public RAiDEntity(String raid, List<String> authors, String startDate, String endDate, List<String> subjects,
|
||||||
|
List<String> titles, List<String> ids, String title, String summary) {
|
||||||
this.raid = raid;
|
this.raid = raid;
|
||||||
this.authors = authors;
|
this.authors = authors;
|
||||||
this.startDate = startDate;
|
this.startDate = startDate;
|
||||||
|
|
|
@ -21,7 +21,6 @@ import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.*;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -45,6 +44,7 @@ import eu.dnetlib.dhp.common.Constants;
|
||||||
import eu.dnetlib.dhp.common.HdfsSupport;
|
import eu.dnetlib.dhp.common.HdfsSupport;
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.*;
|
||||||
import eu.dnetlib.dhp.utils.DHPUtils;
|
import eu.dnetlib.dhp.utils.DHPUtils;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -14,6 +15,7 @@ public class PMAffiliation {
|
||||||
public PMAffiliation() {
|
public PMAffiliation() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public PMAffiliation(String name, PMIdentifier identifier) {
|
public PMAffiliation(String name, PMIdentifier identifier) {
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.identifier = identifier;
|
this.identifier = identifier;
|
||||||
|
|
|
@ -97,5 +97,4 @@ public class PMAuthor implements Serializable {
|
||||||
this.affiliation = affiliation;
|
this.affiliation = affiliation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.sx.bio.pubmed;
|
package eu.dnetlib.dhp.sx.bio.pubmed;
|
||||||
|
|
||||||
public class PMIdentifier {
|
public class PMIdentifier {
|
||||||
|
@ -5,7 +6,6 @@ public class PMIdentifier {
|
||||||
private String pid;
|
private String pid;
|
||||||
private String type;
|
private String type;
|
||||||
|
|
||||||
|
|
||||||
public PMIdentifier(String pid, String type) {
|
public PMIdentifier(String pid, String type) {
|
||||||
this.pid = cleanPid(pid);
|
this.pid = cleanPid(pid);
|
||||||
this.type = type;
|
this.type = type;
|
||||||
|
|
|
@ -20,8 +20,6 @@
|
||||||
<fs>
|
<fs>
|
||||||
<delete path='${raidActionSetPath}'/>
|
<delete path='${raidActionSetPath}'/>
|
||||||
<mkdir path='${raidActionSetPath}'/>
|
<mkdir path='${raidActionSetPath}'/>
|
||||||
<delete path='${workingDir}'/>
|
|
||||||
<mkdir path='${workingDir}'/>
|
|
||||||
</fs>
|
</fs>
|
||||||
<ok to="processRAiDFile"/>
|
<ok to="processRAiDFile"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
|
|
|
@ -673,7 +673,6 @@ case object Crossref2Oaf {
|
||||||
val doi = input.getString(0)
|
val doi = input.getString(0)
|
||||||
val rorId = input.getString(1)
|
val rorId = input.getString(1)
|
||||||
|
|
||||||
|
|
||||||
val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true)
|
val pubId = IdentifierFactory.idFromPid("50", "doi", DoiCleaningRule.clean(doi), true)
|
||||||
val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId)
|
val affId = GenerateRorActionSetJob.calculateOpenaireId(rorId)
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,6 @@ class PMParser2 {
|
||||||
a.setIdentifier(new PMIdentifier(id, idType))
|
a.setIdentifier(new PMIdentifier(id, idType))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text
|
val affiliation = (author \ "AffiliationInfo" \ "Affiliation").text
|
||||||
val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text
|
val affiliationId = (author \ "AffiliationInfo" \ "Identifier").text
|
||||||
val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text
|
val affiliationIdType = (author \ "AffiliationInfo" \ "Identifier" \ "@Source").text
|
||||||
|
@ -96,7 +95,9 @@ class PMParser2 {
|
||||||
if (affiliation != null && affiliation.nonEmpty) {
|
if (affiliation != null && affiliation.nonEmpty) {
|
||||||
val aff = new PMAffiliation()
|
val aff = new PMAffiliation()
|
||||||
aff.setName(affiliation)
|
aff.setName(affiliation)
|
||||||
if(affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty) {
|
if (
|
||||||
|
affiliationId != null && affiliationId.nonEmpty && affiliationIdType != null && affiliationIdType.nonEmpty
|
||||||
|
) {
|
||||||
aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType))
|
aff.setIdentifier(new PMIdentifier(affiliationId, affiliationIdType))
|
||||||
}
|
}
|
||||||
a.setAffiliation(aff)
|
a.setAffiliation(aff)
|
||||||
|
|
|
@ -295,8 +295,20 @@ object PubMedToOaf {
|
||||||
author.setSurname(a.getLastName)
|
author.setSurname(a.getLastName)
|
||||||
author.setFullname(a.getFullName)
|
author.setFullname(a.getFullName)
|
||||||
if (a.getIdentifier != null) {
|
if (a.getIdentifier != null) {
|
||||||
author.setPid(List(OafMapperUtils.structuredProperty(a.getIdentifier.getPid,
|
author.setPid(
|
||||||
OafMapperUtils.qualifier(a.getIdentifier.getType,a.getIdentifier.getType,ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES), dataInfo)).asJava)
|
List(
|
||||||
|
OafMapperUtils.structuredProperty(
|
||||||
|
a.getIdentifier.getPid,
|
||||||
|
OafMapperUtils.qualifier(
|
||||||
|
a.getIdentifier.getType,
|
||||||
|
a.getIdentifier.getType,
|
||||||
|
ModelConstants.DNET_PID_TYPES,
|
||||||
|
ModelConstants.DNET_PID_TYPES
|
||||||
|
),
|
||||||
|
dataInfo
|
||||||
|
)
|
||||||
|
).asJava
|
||||||
|
)
|
||||||
}
|
}
|
||||||
if (a.getAffiliation != null)
|
if (a.getAffiliation != null)
|
||||||
author.setRawAffiliationString(List(a.getAffiliation.getName).asJava)
|
author.setRawAffiliationString(List(a.getAffiliation.getName).asJava)
|
||||||
|
|
|
@ -1,11 +1,16 @@
|
||||||
|
|
||||||
package eu.dnetlib.dhp.actionmanager.raid;
|
package eu.dnetlib.dhp.actionmanager.raid;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest;
|
import static java.nio.file.Files.createTempDirectory;
|
||||||
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
|
||||||
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
import static eu.dnetlib.dhp.actionmanager.Constants.OBJECT_MAPPER;
|
||||||
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
|
||||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
import java.io.File;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
|
||||||
|
@ -20,17 +25,15 @@ import org.junit.jupiter.api.AfterAll;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Disabled;
|
import org.junit.jupiter.api.Disabled;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import eu.dnetlib.dhp.actionmanager.opencitations.CreateOpenCitationsASTest;
|
||||||
|
import eu.dnetlib.dhp.actionmanager.raid.model.RAiDEntity;
|
||||||
|
import eu.dnetlib.dhp.schema.action.AtomicAction;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Oaf;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||||
|
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||||
import scala.Tuple2;
|
import scala.Tuple2;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.nio.file.Paths;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import static eu.dnetlib.dhp.actionmanager.Constants.OBJECT_MAPPER;
|
|
||||||
import static java.nio.file.Files.createTempDirectory;
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
|
||||||
|
|
||||||
public class GenerateRAiDActionSetJobTest {
|
public class GenerateRAiDActionSetJobTest {
|
||||||
private static String input_path;
|
private static String input_path;
|
||||||
private static String output_path;
|
private static String output_path;
|
||||||
|
@ -40,7 +43,10 @@ public class GenerateRAiDActionSetJobTest {
|
||||||
void setUp() throws Exception {
|
void setUp() throws Exception {
|
||||||
|
|
||||||
input_path = Paths
|
input_path = Paths
|
||||||
.get(GenerateRAiDActionSetJobTest.class.getResource("/eu/dnetlib/dhp/actionmanager/raid/raid_example.json").toURI())
|
.get(
|
||||||
|
GenerateRAiDActionSetJobTest.class
|
||||||
|
.getResource("/eu/dnetlib/dhp/actionmanager/raid/raid_example.json")
|
||||||
|
.toURI())
|
||||||
.toFile()
|
.toFile()
|
||||||
.getAbsolutePath();
|
.getAbsolutePath();
|
||||||
|
|
||||||
|
@ -88,17 +94,64 @@ public class GenerateRAiDActionSetJobTest {
|
||||||
@Test
|
@Test
|
||||||
void testPrepareRAiD() {
|
void testPrepareRAiD() {
|
||||||
|
|
||||||
List<AtomicAction<? extends Oaf>> atomicActions = GenerateRAiDActionSetJob.prepareRAiD(new RAiDEntity(
|
List<AtomicAction<? extends Oaf>> atomicActions = GenerateRAiDActionSetJob
|
||||||
|
.prepareRAiD(
|
||||||
|
new RAiDEntity(
|
||||||
"-92190526",
|
"-92190526",
|
||||||
Arrays.asList("Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura", "Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume", "Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont", "Maïeul GRUGET", "Cécile Duchêne"),
|
Arrays
|
||||||
|
.asList(
|
||||||
|
"Berli, Justin", "Le Mao, Bérénice", "Guillaume Touya", "Wenclik, Laura",
|
||||||
|
"Courtial, Azelle", "Muehlenhaus, Ian", "Justin Berli", "Touya, Guillaume",
|
||||||
|
"Gruget, Maïeul", "Azelle Courtial", "Ian Muhlenhaus", "Maïeul Gruget", "Marion Dumont",
|
||||||
|
"Maïeul GRUGET", "Cécile Duchêne"),
|
||||||
"2021-09-10",
|
"2021-09-10",
|
||||||
"2024-02-16",
|
"2024-02-16",
|
||||||
Arrays.asList("cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps", "pan-scalar map", "Python library", "QGIS", "map design", "landmarks", "Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]", "[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography", "eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency", "General Medicine", "Geography, Planning and Development", "multi-scales", "pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences", "progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design", "cartography, map generalisation, zoom, multi-scale map", "Interactive maps", "Map generalisation", "Earth and Planetary Sciences (miscellaneous)", "Cartographic generalization", "rivers", "Benchmark", "General Environmental Science", "open source", "drawing", "Constraint", "Multi-scale maps"),
|
Arrays
|
||||||
Arrays.asList("Where do people look at during multi-scale map tasks?", "FogDetector survey raw data", "Collection of cartographic disorientation stories", "Anchorwhat dataset", "BasqueRoads: A Benchmark for Road Network Selection", "Progressive river network selection for pan-scalar maps", "BasqueRoads, a dataset to benchmark road selection algorithms", "Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps", "Empirical approach to advance the generalisation of multi-scale maps", "L'Alpe d'Huez: a dataset to benchmark topographic map generalisation", "eye-tracking data from a survey on zooming in a pan-scalar map", "Material of the experiment 'More is Less' from the MapMuxing project", "Cartagen4py, an open source Python library for map generalisation", "L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"),
|
.asList(
|
||||||
Arrays.asList("50|doi_dedup___::6915135e0aa39f913394513f809ae58a", "50|doi_dedup___::754e3c283639bc6e104c925ff3e34007", "50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0", "50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a", "50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153", "50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a", "50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13", "50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4", "50|doi_dedup___::a9bc4453273b2d02648a5cb453195042", "50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7", "50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5", "50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283", "50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea", "50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"),
|
"cartography, zoom, pan, desert fog", "Road network", "zooming", "Pan-scalar maps",
|
||||||
|
"pan-scalar map", "Python library", "QGIS", "map design", "landmarks",
|
||||||
|
"Cartes transscalaires", "anchor", "disorientation", "[INFO]Computer Science [cs]",
|
||||||
|
"[SHS.GEO]Humanities and Social Sciences/Geography", "cognitive cartography",
|
||||||
|
"eye-tracking", "Computers in Earth Sciences", "Topographic map", "National Mapping Agency",
|
||||||
|
"General Medicine", "Geography, Planning and Development", "multi-scales",
|
||||||
|
"pan-scalar maps", "Selection", "cartography", "General Earth and Planetary Sciences",
|
||||||
|
"progressiveness", "map generalisation", "Eye-tracker", "zoom", "algorithms", "Map Design",
|
||||||
|
"cartography, map generalisation, zoom, multi-scale map", "Interactive maps",
|
||||||
|
"Map generalisation", "Earth and Planetary Sciences (miscellaneous)",
|
||||||
|
"Cartographic generalization", "rivers", "Benchmark", "General Environmental Science",
|
||||||
|
"open source", "drawing", "Constraint", "Multi-scale maps"),
|
||||||
|
Arrays
|
||||||
|
.asList(
|
||||||
|
"Where do people look at during multi-scale map tasks?", "FogDetector survey raw data",
|
||||||
|
"Collection of cartographic disorientation stories", "Anchorwhat dataset",
|
||||||
|
"BasqueRoads: A Benchmark for Road Network Selection",
|
||||||
|
"Progressive river network selection for pan-scalar maps",
|
||||||
|
"BasqueRoads, a dataset to benchmark road selection algorithms",
|
||||||
|
"Missing the city for buildings? A critical review of pan-scalar map generalization and design in contemporary zoomable maps",
|
||||||
|
"Empirical approach to advance the generalisation of multi-scale maps",
|
||||||
|
"L'Alpe d'Huez: a dataset to benchmark topographic map generalisation",
|
||||||
|
"eye-tracking data from a survey on zooming in a pan-scalar map",
|
||||||
|
"Material of the experiment 'More is Less' from the MapMuxing project",
|
||||||
|
"Cartagen4py, an open source Python library for map generalisation",
|
||||||
|
"L’Alpe d’Huez: A Benchmark for Topographic Map Generalisation"),
|
||||||
|
Arrays
|
||||||
|
.asList(
|
||||||
|
"50|doi_dedup___::6915135e0aa39f913394513f809ae58a",
|
||||||
|
"50|doi_dedup___::754e3c283639bc6e104c925ff3e34007",
|
||||||
|
"50|doi_dedup___::13517477f3c1261d57a3364363ce6ce0",
|
||||||
|
"50|doi_dedup___::675b16c73accc4e7242bbb4ed9b3724a",
|
||||||
|
"50|doi_dedup___::94ce09906b2d7d37eb2206cea8a50153",
|
||||||
|
"50|dedup_wf_002::cc575d5ca5651ff8c3029a3a76e7e70a",
|
||||||
|
"50|doi_dedup___::c5e52baddda17c755d1bae012a97dc13",
|
||||||
|
"50|doi_dedup___::4f5f38c9e08fe995f7278963183f8ad4",
|
||||||
|
"50|doi_dedup___::a9bc4453273b2d02648a5cb453195042",
|
||||||
|
"50|doi_dedup___::5e893dc0cb7624a33f41c9b428bd59f7",
|
||||||
|
"50|doi_dedup___::c1ecdef48fd9be811a291deed950e1c5",
|
||||||
|
"50|doi_dedup___::9e93c8f2d97c35de8a6a57a5b53ef283",
|
||||||
|
"50|dedup_wf_002::d08be0ed27b13d8a880e891e08d093ea",
|
||||||
|
"50|doi_dedup___::f8d8b3b9eddeca2fc0e3bc9e63996555"),
|
||||||
"Exploring Multi-Scale Map Generalization and Design",
|
"Exploring Multi-Scale Map Generalization and Design",
|
||||||
"This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval."
|
"This project aims to advance the generalization of multi-scale maps by investigating the impact of different design elements on user experience. The research involves collecting and analyzing data from various sources, including surveys, eye-tracking studies, and user experiments. The goal is to identify best practices for map generalization and design, with a focus on reducing disorientation and improving information retrieval during exploration. The project has led to the development of several datasets, including BasqueRoads, AnchorWhat, and L'Alpe d'Huez, which can be used to benchmark road selection algorithms and topographic map generalization techniques. The research has also resulted in the creation of a Python library, Cartagen4py, for map generalization. The findings of this project have the potential to improve the design and usability of multi-scale maps, making them more effective tools for navigation and information retrieval."));
|
||||||
));
|
|
||||||
|
|
||||||
OtherResearchProduct orp = (OtherResearchProduct) atomicActions.get(0).getPayload();
|
OtherResearchProduct orp = (OtherResearchProduct) atomicActions.get(0).getPayload();
|
||||||
Relation rel = (Relation) atomicActions.get(1).getPayload();
|
Relation rel = (Relation) atomicActions.get(1).getPayload();
|
||||||
|
|
|
@ -63,7 +63,6 @@ class BioScholixTest extends AbstractVocabularyTest {
|
||||||
"0000000333457333",
|
"0000000333457333",
|
||||||
"0000000335964515",
|
"0000000335964515",
|
||||||
"0000000302921949",
|
"0000000302921949",
|
||||||
|
|
||||||
"http://orcid.org/0000-0001-8567-3543",
|
"http://orcid.org/0000-0001-8567-3543",
|
||||||
"http://orcid.org/0000-0001-7868-8528",
|
"http://orcid.org/0000-0001-7868-8528",
|
||||||
"0000-0001-9189-1440",
|
"0000-0001-9189-1440",
|
||||||
|
|
|
@ -915,7 +915,8 @@ class MappersTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testODFRecord_guidelines4() throws IOException {
|
void testODFRecord_guidelines4() throws IOException {
|
||||||
final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml")));
|
final String xml = IOUtils
|
||||||
|
.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_guidelines4.xml")));
|
||||||
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml);
|
||||||
|
|
||||||
final Publication p = (Publication) list.get(0);
|
final Publication p = (Publication) list.get(0);
|
||||||
|
|
|
@ -5,7 +5,6 @@ import java.io.StringReader;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import eu.dnetlib.dhp.schema.solr.PersonTopic;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
|
@ -40,6 +39,7 @@ import eu.dnetlib.dhp.schema.solr.OpenAccessColor;
|
||||||
import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
|
import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
|
||||||
import eu.dnetlib.dhp.schema.solr.Organization;
|
import eu.dnetlib.dhp.schema.solr.Organization;
|
||||||
import eu.dnetlib.dhp.schema.solr.Person;
|
import eu.dnetlib.dhp.schema.solr.Person;
|
||||||
|
import eu.dnetlib.dhp.schema.solr.PersonTopic;
|
||||||
import eu.dnetlib.dhp.schema.solr.Pid;
|
import eu.dnetlib.dhp.schema.solr.Pid;
|
||||||
import eu.dnetlib.dhp.schema.solr.Project;
|
import eu.dnetlib.dhp.schema.solr.Project;
|
||||||
import eu.dnetlib.dhp.schema.solr.Result;
|
import eu.dnetlib.dhp.schema.solr.Result;
|
||||||
|
@ -216,8 +216,11 @@ public class ProvisionModelSupport {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<PersonTopic> mapPersonTopics(List<eu.dnetlib.dhp.schema.oaf.PersonTopic> subjects) {
|
private static List<PersonTopic> mapPersonTopics(List<eu.dnetlib.dhp.schema.oaf.PersonTopic> subjects) {
|
||||||
return Optional.ofNullable(subjects)
|
return Optional
|
||||||
.map(ss -> ss.stream()
|
.ofNullable(subjects)
|
||||||
|
.map(
|
||||||
|
ss -> ss
|
||||||
|
.stream()
|
||||||
.map(ProvisionModelSupport::mapPersonTopic)
|
.map(ProvisionModelSupport::mapPersonTopic)
|
||||||
.collect(Collectors.toList()))
|
.collect(Collectors.toList()))
|
||||||
.orElse(null);
|
.orElse(null);
|
||||||
|
|
Loading…
Reference in New Issue