forked from D-Net/dnet-hadoop
EOSC Services - removed fields from mapping, testing preparation
This commit is contained in:
parent
a8c51f6f16
commit
b6a7ff3a99
|
@ -148,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
log.info("Processing Organizations...");
|
||||
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||
|
||||
log.info("Processing relationsNoRemoval ds <-> orgs ...");
|
||||
log.info("Processing relations services <-> orgs ...");
|
||||
smdbe
|
||||
.execute(
|
||||
"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
|
||||
"queryServiceOrganization.sql", smdbe::processServiceOrganization,
|
||||
verifyNamespacePrefix);
|
||||
|
||||
log.info("Processing projects <-> orgs ...");
|
||||
|
@ -268,13 +268,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
|
||||
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
|
||||
ds.setLanguages(listValues(rs.getArray("languages")));
|
||||
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
|
||||
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
|
||||
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
|
||||
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
|
||||
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
|
||||
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
|
||||
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
|
||||
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
|
||||
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
|
||||
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
|
||||
|
@ -293,10 +290,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
rs.getString("issnLinking"), info)); // Journal
|
||||
|
||||
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
|
||||
ds.setProvidedproducttypes(listValues(rs.getArray("providedproducttypes")));
|
||||
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
||||
ds.setThematic(rs.getBoolean("thematic"));
|
||||
ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
|
||||
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
||||
ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
|
||||
ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
|
||||
|
@ -434,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
|||
}
|
||||
}
|
||||
|
||||
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
|
||||
public List<Oaf> processServiceOrganization(final ResultSet rs) {
|
||||
try {
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
|
||||
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
|
||||
final String dsId = createOpenaireId(10, rs.getString("service"), true);
|
||||
final List<KeyValue> collectedFrom = listKeyValues(
|
||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||
|
||||
|
|
|
@ -30,6 +30,11 @@
|
|||
<value></value>
|
||||
<description>a blacklist of nsprefixes (comma separeted)</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>reuseContent</name>
|
||||
<value>false</value>
|
||||
<description>reuse content in the aggregator database</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>sparkDriverMemory</name>
|
||||
<description>memory for driver process</description>
|
||||
|
@ -85,12 +90,20 @@
|
|||
</configuration>
|
||||
</global>
|
||||
|
||||
<start to="ImportDB"/>
|
||||
<start to="reuse_db"/>
|
||||
|
||||
<kill name="Kill">
|
||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||
</kill>
|
||||
|
||||
<decision name="reuse_db">
|
||||
<switch>
|
||||
<case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
|
||||
<case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
|
||||
<default to="ImportDB"/>
|
||||
</switch>
|
||||
</decision>
|
||||
|
||||
<action name="ImportDB">
|
||||
<java>
|
||||
<prepare>
|
||||
|
@ -125,6 +138,55 @@
|
|||
<arg>--action</arg><arg>claims</arg>
|
||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||
</java>
|
||||
<ok to="GenerateEntities"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateEntities">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateEntities</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
</spark-opts>
|
||||
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
|
||||
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||
<arg>--shouldHashId</arg><arg>true</arg>
|
||||
</spark>
|
||||
<ok to="GenerateGraph"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
||||
<action name="GenerateGraph">
|
||||
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>GenerateGraph</name>
|
||||
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
|
||||
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-memory ${sparkExecutorMemory}
|
||||
--executor-cores ${sparkExecutorCores}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=7680
|
||||
</spark-opts>
|
||||
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
|
||||
<arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
|
||||
</spark>
|
||||
<ok to="End"/>
|
||||
<error to="Kill"/>
|
||||
</action>
|
||||
|
|
|
@ -109,7 +109,6 @@ SELECT
|
|||
d.lastconsenttermsofusedate AS lastconsenttermsofusedate,
|
||||
d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
||||
d.thematic AS thematic,
|
||||
-- REMOVED ???: d.knowledge_graph AS knowledgegraph,
|
||||
array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies,
|
||||
nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl,
|
||||
nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl,
|
||||
|
|
|
@ -129,9 +129,6 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
getValueAsList("odlanguages", fields),
|
||||
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||
assertEquals(getValueAsList("languages", fields), ds.getLanguages());
|
||||
assertEquals(
|
||||
getValueAsList("odcontenttypes", fields),
|
||||
ds.getOdcontenttypes().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||
assertEquals(
|
||||
getValueAsList("accessinfopackage", fields),
|
||||
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||
|
@ -155,13 +152,11 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
|
||||
|
||||
assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
|
||||
assertEquals(getValueAsList("providedproducttypes", fields), ds.getProvidedproducttypes());
|
||||
|
||||
assertEquals("National", ds.getJurisdiction().getClassid());
|
||||
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
||||
|
||||
assertTrue(ds.getThematic());
|
||||
assertTrue(ds.getKnowledgegraph());
|
||||
|
||||
HashSet<String> cpSchemeId = ds
|
||||
.getContentpolicies()
|
||||
|
@ -246,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest {
|
|||
public void testProcessDatasourceOrganization() throws Exception {
|
||||
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
||||
|
||||
final List<Oaf> list = app.processDatasourceOrganization(rs);
|
||||
final List<Oaf> list = app.processServiceOrganization(rs);
|
||||
|
||||
assertEquals(2, list.size());
|
||||
verifyMocks(fields);
|
||||
|
|
|
@ -140,13 +140,6 @@
|
|||
"Swedish"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "odcontenttypes",
|
||||
"type": "array",
|
||||
"value": [
|
||||
"Journal articles"
|
||||
]
|
||||
},
|
||||
{
|
||||
"field": "accessinfopackage",
|
||||
"type": "array",
|
||||
|
@ -169,16 +162,6 @@
|
|||
"type": "string",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "dataprovider",
|
||||
"type": "boolean",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "serviceprovider",
|
||||
"type": "boolean",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"field": "databaseaccesstype",
|
||||
"type": "string",
|
||||
|
|
|
@ -254,24 +254,6 @@
|
|||
}
|
||||
],
|
||||
"languages" : [ "English", "German" ],
|
||||
"odcontenttypes": [
|
||||
{
|
||||
"value": "Journal articles",
|
||||
"dataInfo": {
|
||||
"invisible": false,
|
||||
"inferred": false,
|
||||
"deletedbyinference": false,
|
||||
"trust": "0.900",
|
||||
"inferenceprovenance": null,
|
||||
"provenanceaction": {
|
||||
"classid": "sysimport:crosswalk:entityregistry",
|
||||
"classname": "sysimport:crosswalk:entityregistry",
|
||||
"schemeid": "dnet:provenanceActions",
|
||||
"schemename": "dnet:provenanceActions"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"accessinfopackage": [
|
||||
{
|
||||
"value": "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai",
|
||||
|
@ -387,7 +369,6 @@
|
|||
"schemename": "eosc:jurisdictions"
|
||||
},
|
||||
"thematic": true,
|
||||
"knowledgegraph": true,
|
||||
"contentpolicies": [
|
||||
{
|
||||
"classid": "Journal article",
|
||||
|
|
Loading…
Reference in New Issue