This commit is contained in:
Miriam Baglioni 2020-06-22 19:14:06 +02:00
parent 1566fd590e
commit 3da12be81f
5 changed files with 29 additions and 42 deletions

View File

@ -35,11 +35,13 @@ public class Mapper implements Serializable {
externals.add(kv); externals.add(kv);
} }
out.setUrl(Constants.PUBLICATION_URL + input.getId().substring(3)); out.setUrl(Constants.PUBLICATION_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("result type", "publication"));
break; break;
case "dataset": case "dataset":
eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input; eu.dnetlib.dhp.schema.oaf.Dataset id = (eu.dnetlib.dhp.schema.oaf.Dataset) input;
Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue())); Optional.ofNullable(id.getVersion()).ifPresent(v -> out.setVersion(v.getValue()));
out.setUrl(Constants.DATASET_URL + input.getId().substring(3)); out.setUrl(Constants.DATASET_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("result type", "dataset"));
break; break;
case "software": case "software":
@ -59,10 +61,12 @@ public class Mapper implements Serializable {
.ifPresent( .ifPresent(
value -> externals.add(KeyValue.newInstance("programming language", value.getClassname()))); value -> externals.add(KeyValue.newInstance("programming language", value.getClassname())));
out.setUrl(Constants.SOFTWARE_URL + input.getId().substring(3)); out.setUrl(Constants.SOFTWARE_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("result type", "software"));
break; break;
case "other": case "other":
out.setUrl(Constants.ORP_URL + input.getId().substring(3)); out.setUrl(Constants.ORP_URL + input.getId().substring(3));
externals.add(KeyValue.newInstance("result type", "other"));
break; break;
} }
@ -100,13 +104,10 @@ public class Mapper implements Serializable {
final List<String> descriptionList = new ArrayList<>(); final List<String> descriptionList = new ArrayList<>();
Optional Optional
.ofNullable(input.getDescription()) .ofNullable(input.getDescription())
.ifPresent(value -> { .ifPresent(value ->
Iterator<Field<String>> it = value.iterator();
if (it.hasNext()) { getDescription(out, externals, value)
out.setName(it.next().getValue()); );
}
it.forEachRemaining(v -> externals.add(KeyValue.newInstance("description", v.getValue())));
});
Optional Optional
.ofNullable(input.getEmbargoenddate()) .ofNullable(input.getEmbargoenddate())
@ -133,7 +134,6 @@ public class Mapper implements Serializable {
.ofNullable(v.getUrl()) .ofNullable(v.getUrl())
.ifPresent(u -> u.forEach(url -> urlSet.add(url))); .ifPresent(u -> u.forEach(url -> urlSet.add(url)));
})); }));
Optional Optional
@ -180,7 +180,6 @@ public class Mapper implements Serializable {
.add( .add(
KeyValue KeyValue
.newInstance("subject", s.getQualifier().getClassid() + ":" + s.getValue())))); .newInstance("subject", s.getQualifier().getClassid() + ":" + s.getValue()))));
externals.add(KeyValue.newInstance("resource type", input.getResourcetype().getClassid()));
cfSet.forEach(cf -> externals.add(KeyValue.newInstance("collected from", cf))); cfSet.forEach(cf -> externals.add(KeyValue.newInstance("collected from", cf)));
hbSet.forEach(hb -> externals.add(KeyValue.newInstance("hosted by", hb))); hbSet.forEach(hb -> externals.add(KeyValue.newInstance("hosted by", hb)));
@ -193,31 +192,13 @@ public class Mapper implements Serializable {
return out; return out;
} }
private static eu.dnetlib.dhp.schema.dump.oaf.Author getAuthor(eu.dnetlib.dhp.schema.oaf.Author oa) { private static void getDescription(CatalogueEntry out, List<KeyValue> externals, List<Field<String>> value) {
eu.dnetlib.dhp.schema.dump.oaf.Author a = new eu.dnetlib.dhp.schema.dump.oaf.Author(); Iterator<Field<String>> it = value.iterator();
Optional if (it.hasNext()) {
.ofNullable(oa.getAffiliation()) out.setNotes(it.next().getValue());
.ifPresent( }
value -> a it.forEachRemaining(v -> externals.add(KeyValue.newInstance("description", v.getValue())));
.setAffiliation(
value
.stream()
.map(aff -> aff.getValue())
.collect(Collectors.toList())));
a.setFullname(oa.getFullname());
a.setName(oa.getName());
a.setSurname(oa.getSurname());
a.setRank(oa.getRank());
Optional
.ofNullable(oa.getPid())
.ifPresent(
value -> a
.setPid(
value
.stream()
.map(p -> ControlledField.newInstance(p.getQualifier().getClassid(), p.getValue()))
.collect(Collectors.toList())));
return a;
} }
} }

View File

@ -122,7 +122,7 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/publication</arg> <arg>--sourcePath</arg><arg>${sourcePath}/publication</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Publication</arg>
<arg>--outputPath</arg><arg>${workingDir}/publication</arg> <arg>--outputPath</arg><arg>${outputPath}/publication</arg>
<arg>--communityName</arg><arg>${communityName}</arg> <arg>--communityName</arg><arg>${communityName}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
@ -148,7 +148,7 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg> <arg>--sourcePath</arg><arg>${sourcePath}/dataset</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Dataset</arg>
<arg>--outputPath</arg><arg>${workingDir}/dataset</arg> <arg>--outputPath</arg><arg>${outputPath}/dataset</arg>
<arg>--communityName</arg><arg>${communityName}</arg> <arg>--communityName</arg><arg>${communityName}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
@ -174,7 +174,7 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg> <arg>--sourcePath</arg><arg>${sourcePath}/otherresearchproduct</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.OtherResearchProduct</arg>
<arg>--outputPath</arg><arg>${workingDir}/otherresearchproduct</arg> <arg>--outputPath</arg><arg>${outputPath}/otherresearchproduct</arg>
<arg>--communityName</arg><arg>${communityName}</arg> <arg>--communityName</arg><arg>${communityName}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>
@ -200,7 +200,7 @@
</spark-opts> </spark-opts>
<arg>--sourcePath</arg><arg>${sourcePath}/software</arg> <arg>--sourcePath</arg><arg>${sourcePath}/software</arg>
<arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg> <arg>--resultTableName</arg><arg>eu.dnetlib.dhp.schema.oaf.Software</arg>
<arg>--outputPath</arg><arg>${workingDir}/software</arg> <arg>--outputPath</arg><arg>${outputPath}/software</arg>
<arg>--communityName</arg><arg>${communityName}</arg> <arg>--communityName</arg><arg>${communityName}</arg>
</spark> </spark>
<ok to="join_dump"/> <ok to="join_dump"/>

View File

@ -69,17 +69,17 @@ public class DumpJobTest {
} }
@Test @Test
public void testDataset() throws Exception { public void testSoftware() throws Exception {
final String sourcePath = getClass() final String sourcePath = getClass()
.getResource("/eu/dnetlib/dhp/oa/graph/dump/resultDump/dataset.json") .getResource("/eu/dnetlib/dhp/oa/graph/dump/gcat/software.json")
.getPath(); .getPath();
SparkDumpRISISCatalogue.main(new String[] { SparkDumpRISISCatalogue.main(new String[] {
"-isSparkSessionManaged", Boolean.FALSE.toString(), "-isSparkSessionManaged", Boolean.FALSE.toString(),
"-outputPath", workingDir.toString() + "/result", "-outputPath", workingDir.toString() + "/result",
"-sourcePath", sourcePath, "-sourcePath", sourcePath,
"-resultTableName", "eu.dnetlib.dhp.schema.oaf.Dataset", "-resultTableName", "eu.dnetlib.dhp.schema.oaf.Software",
"-communityName", "risis" "-communityName", "risis"
}); });

File diff suppressed because one or more lines are too long