forked from D-Net/dnet-hadoop
Merge branch 'master' into graph_cleaning
This commit is contained in:
commit
953da4a427
|
@ -63,6 +63,8 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
protected final VocabularyGroup vocs;
|
protected final VocabularyGroup vocs;
|
||||||
|
|
||||||
|
private final boolean invisible;
|
||||||
|
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
protected static final String DATACITE_SCHEMA_KERNEL_4 = "http://datacite.org/schema/kernel-4";
|
||||||
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
protected static final String DATACITE_SCHEMA_KERNEL_3 = "http://datacite.org/schema/kernel-3";
|
||||||
protected static final Qualifier ORCID_PID_TYPE = qualifier(
|
protected static final Qualifier ORCID_PID_TYPE = qualifier(
|
||||||
|
@ -85,8 +87,9 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
|
protected static final Qualifier MAIN_TITLE_QUALIFIER = qualifier(
|
||||||
"main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
"main title", "main title", "dnet:dataCite_title", "dnet:dataCite_title");
|
||||||
|
|
||||||
protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs) {
|
protected AbstractMdRecordToOafMapper(final VocabularyGroup vocs, final boolean invisible) {
|
||||||
this.vocs = vocs;
|
this.vocs = vocs;
|
||||||
|
this.invisible = invisible;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Oaf> processMdRecord(final String xml) {
|
public List<Oaf> processMdRecord(final String xml) {
|
||||||
|
@ -112,7 +115,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final DataInfo info = prepareDataInfo(doc);
|
final DataInfo info = prepareDataInfo(doc, invisible);
|
||||||
final long lastUpdateTimestamp = new Date().getTime();
|
final long lastUpdateTimestamp = new Date().getTime();
|
||||||
|
|
||||||
return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
return createOafs(doc, type, collectedFrom, hostedBy, info, lastUpdateTimestamp);
|
||||||
|
@ -510,11 +513,11 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
|
return oaiIProvenance(identifier, baseURL, metadataNamespace, altered, datestamp, harvestDate);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DataInfo prepareDataInfo(final Document doc) {
|
protected DataInfo prepareDataInfo(final Document doc, final boolean invisible) {
|
||||||
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
final Node n = doc.selectSingleNode("//oaf:datainfo");
|
||||||
|
|
||||||
if (n == null) {
|
if (n == null) {
|
||||||
return dataInfo(false, null, false, false, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
return dataInfo(false, null, false, invisible, REPOSITORY_PROVENANCE_ACTIONS, "0.9");
|
||||||
}
|
}
|
||||||
|
|
||||||
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
final String paClassId = n.valueOf("./oaf:provenanceaction/@classid");
|
||||||
|
@ -528,7 +531,7 @@ public abstract class AbstractMdRecordToOafMapper {
|
||||||
final String trust = n.valueOf("./oaf:trust");
|
final String trust = n.valueOf("./oaf:trust");
|
||||||
|
|
||||||
return dataInfo(
|
return dataInfo(
|
||||||
deletedbyinference, inferenceprovenance, inferred, false,
|
deletedbyinference, inferenceprovenance, inferred, invisible,
|
||||||
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
qualifier(paClassId, paClassName, paSchemeId, paSchemeName), trust);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -140,10 +140,14 @@ public class GenerateEntitiesApplication {
|
||||||
final String type = StringUtils.substringAfter(id, ":");
|
final String type = StringUtils.substringAfter(id, ":");
|
||||||
|
|
||||||
switch (type.toLowerCase()) {
|
switch (type.toLowerCase()) {
|
||||||
case "native_oaf":
|
case "oaf-store-cleaned":
|
||||||
return new OafToOafMapper(vocs).processMdRecord(s);
|
return new OafToOafMapper(vocs, false).processMdRecord(s);
|
||||||
case "native_odf":
|
case "odf-store-cleaned":
|
||||||
return new OdfToOafMapper(vocs).processMdRecord(s);
|
return new OdfToOafMapper(vocs, false).processMdRecord(s);
|
||||||
|
case "oaf-store-intersection":
|
||||||
|
return new OafToOafMapper(vocs, true).processMdRecord(s);
|
||||||
|
case "odf-store-intersection":
|
||||||
|
return new OdfToOafMapper(vocs, true).processMdRecord(s);
|
||||||
case "datasource":
|
case "datasource":
|
||||||
return Arrays.asList(convertFromJson(s, Datasource.class));
|
return Arrays.asList(convertFromJson(s, Datasource.class));
|
||||||
case "organization":
|
case "organization":
|
||||||
|
|
|
@ -26,8 +26,7 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio
|
||||||
IOUtils
|
IOUtils
|
||||||
.toString(
|
.toString(
|
||||||
MigrateMongoMdstoresApplication.class
|
MigrateMongoMdstoresApplication.class
|
||||||
.getResourceAsStream(
|
.getResourceAsStream("/eu/dnetlib/dhp/oa/graph/migrate_mongo_mstores_parameters.json")));
|
||||||
"/eu/dnetlib/dhp/oa/graph/migrate_mongo_mstores_parameters.json")));
|
|
||||||
parser.parseArgument(args);
|
parser.parseArgument(args);
|
||||||
|
|
||||||
final String mongoBaseUrl = parser.get("mongoBaseUrl");
|
final String mongoBaseUrl = parser.get("mongoBaseUrl");
|
||||||
|
@ -60,7 +59,7 @@ public class MigrateMongoMdstoresApplication extends AbstractMigrationApplicatio
|
||||||
final String currentColl = entry.getValue();
|
final String currentColl = entry.getValue();
|
||||||
|
|
||||||
for (final String xml : mdstoreClient.listRecords(currentColl)) {
|
for (final String xml : mdstoreClient.listRecords(currentColl)) {
|
||||||
emit(xml, "native_" + format);
|
emit(xml, String.format("%s-%s-%s", format, layout, interpretation));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,8 +31,8 @@ import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||||
|
|
||||||
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
public class OafToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
public OafToOafMapper(final VocabularyGroup vocs) {
|
public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible) {
|
||||||
super(vocs);
|
super(vocs, invisible);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -32,8 +32,8 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper {
|
||||||
|
|
||||||
public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/";
|
public static final String HTTP_DX_DOI_PREIFX = "http://dx.doi.org/";
|
||||||
|
|
||||||
public OdfToOafMapper(final VocabularyGroup vocs) {
|
public OdfToOafMapper(final VocabularyGroup vocs, final boolean invisible) {
|
||||||
super(vocs);
|
super(vocs, invisible);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -210,6 +210,23 @@
|
||||||
<arg>--mdLayout</arg><arg>store</arg>
|
<arg>--mdLayout</arg><arg>store</arg>
|
||||||
<arg>--mdInterpretation</arg><arg>cleaned</arg>
|
<arg>--mdInterpretation</arg><arg>cleaned</arg>
|
||||||
</java>
|
</java>
|
||||||
|
<ok to="ImportOAF_invisible"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="ImportOAF_invisible">
|
||||||
|
<java>
|
||||||
|
<prepare>
|
||||||
|
<delete path="${contentPath}/oaf_records_invisible"/>
|
||||||
|
</prepare>
|
||||||
|
<main-class>eu.dnetlib.dhp.oa.graph.raw.MigrateMongoMdstoresApplication</main-class>
|
||||||
|
<arg>--hdfsPath</arg><arg>${contentPath}/oaf_records_invisible</arg>
|
||||||
|
<arg>--mongoBaseUrl</arg><arg>${mongoURL}</arg>
|
||||||
|
<arg>--mongoDb</arg><arg>${mongoDb}</arg>
|
||||||
|
<arg>--mdFormat</arg><arg>OAF</arg>
|
||||||
|
<arg>--mdLayout</arg><arg>store</arg>
|
||||||
|
<arg>--mdInterpretation</arg><arg>intersection</arg>
|
||||||
|
</java>
|
||||||
<ok to="wait_import"/>
|
<ok to="wait_import"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
@ -237,7 +254,7 @@
|
||||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
</spark-opts>
|
</spark-opts>
|
||||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims</arg>
|
<arg>--sourcePaths</arg><arg>${contentPath}/db_claims,${contentPath}/oaf_claims,${contentPath}/odf_claims,${contentPath}/oaf_records_invisible</arg>
|
||||||
<arg>--targetPath</arg><arg>${workingDir}/entities_claim</arg>
|
<arg>--targetPath</arg><arg>${workingDir}/entities_claim</arg>
|
||||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
</spark>
|
</spark>
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
package eu.dnetlib.dhp.oa.graph.raw;
|
package eu.dnetlib.dhp.oa.graph.raw;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
import static org.mockito.ArgumentMatchers.anyString;
|
import static org.mockito.ArgumentMatchers.anyString;
|
||||||
|
@ -55,7 +56,7 @@ public class MappersTest {
|
||||||
|
|
||||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml"));
|
final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml"));
|
||||||
|
|
||||||
final List<Oaf> list = new OafToOafMapper(vocs).processMdRecord(xml);
|
final List<Oaf> list = new OafToOafMapper(vocs, false).processMdRecord(xml);
|
||||||
|
|
||||||
assertEquals(3, list.size());
|
assertEquals(3, list.size());
|
||||||
assertTrue(list.get(0) instanceof Publication);
|
assertTrue(list.get(0) instanceof Publication);
|
||||||
|
@ -69,6 +70,7 @@ public class MappersTest {
|
||||||
assertValidId(p.getId());
|
assertValidId(p.getId());
|
||||||
assertValidId(p.getCollectedfrom().get(0).getKey());
|
assertValidId(p.getCollectedfrom().get(0).getKey());
|
||||||
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
assertTrue(StringUtils.isNotBlank(p.getTitle().get(0).getValue()));
|
||||||
|
assertFalse(p.getDataInfo().getInvisible());
|
||||||
|
|
||||||
assertTrue(p.getAuthor().size() > 0);
|
assertTrue(p.getAuthor().size() > 0);
|
||||||
final Optional<Author> author = p
|
final Optional<Author> author = p
|
||||||
|
@ -134,11 +136,27 @@ public class MappersTest {
|
||||||
// System.out.println(new ObjectMapper().writeValueAsString(r2));
|
// System.out.println(new ObjectMapper().writeValueAsString(r2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testPublicationInvisible() throws IOException {
|
||||||
|
|
||||||
|
final String xml = IOUtils.toString(getClass().getResourceAsStream("oaf_record.xml"));
|
||||||
|
|
||||||
|
final List<Oaf> list = new OafToOafMapper(vocs, true).processMdRecord(xml);
|
||||||
|
|
||||||
|
assertTrue(list.size() > 0);
|
||||||
|
assertTrue(list.get(0) instanceof Publication);
|
||||||
|
|
||||||
|
final Publication p = (Publication) list.get(0);
|
||||||
|
|
||||||
|
assertTrue(p.getDataInfo().getInvisible());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testDataset() throws IOException {
|
void testDataset() throws IOException {
|
||||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml"));
|
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_dataset.xml"));
|
||||||
|
|
||||||
final List<Oaf> list = new OdfToOafMapper(vocs).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
||||||
|
|
||||||
assertEquals(3, list.size());
|
assertEquals(3, list.size());
|
||||||
assertTrue(list.get(0) instanceof Dataset);
|
assertTrue(list.get(0) instanceof Dataset);
|
||||||
|
@ -220,7 +238,7 @@ public class MappersTest {
|
||||||
void testSoftware() throws IOException {
|
void testSoftware() throws IOException {
|
||||||
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml"));
|
final String xml = IOUtils.toString(getClass().getResourceAsStream("odf_software.xml"));
|
||||||
|
|
||||||
final List<Oaf> list = new OdfToOafMapper(vocs).processMdRecord(xml);
|
final List<Oaf> list = new OdfToOafMapper(vocs, false).processMdRecord(xml);
|
||||||
|
|
||||||
assertEquals(1, list.size());
|
assertEquals(1, list.size());
|
||||||
assertTrue(list.get(0) instanceof Software);
|
assertTrue(list.get(0) instanceof Software);
|
||||||
|
|
Loading…
Reference in New Issue