forked from D-Net/dnet-hadoop
partial implementation of entities from db
This commit is contained in:
parent
81f82b5d34
commit
b35c59eb42
|
@ -76,6 +76,41 @@
|
|||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
|
||||
<plugin>
|
||||
<groupId>org.eclipse.m2e</groupId>
|
||||
<artifactId>lifecycle-mapping</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<configuration>
|
||||
<lifecycleMappingMetadata>
|
||||
<pluginExecutions>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>
|
||||
org.apache.maven.plugins
|
||||
</groupId>
|
||||
<artifactId>
|
||||
maven-plugin-plugin
|
||||
</artifactId>
|
||||
<versionRange>
|
||||
[3.2,)
|
||||
</versionRange>
|
||||
<goals>
|
||||
<goal>descriptor</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore></ignore>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
</pluginExecutions>
|
||||
</lifecycleMappingMetadata>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -5,6 +5,11 @@ import java.util.List;
|
|||
|
||||
public class Relation implements Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final long serialVersionUID = -9103706796710618813L;
|
||||
|
||||
private String source;
|
||||
|
||||
private String target;
|
||||
|
@ -17,7 +22,7 @@ public class Relation implements Serializable {
|
|||
return source;
|
||||
}
|
||||
|
||||
public void setSource(String source) {
|
||||
public void setSource(final String source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
|
@ -25,7 +30,7 @@ public class Relation implements Serializable {
|
|||
return target;
|
||||
}
|
||||
|
||||
public void setTarget(String target) {
|
||||
public void setTarget(final String target) {
|
||||
this.target = target;
|
||||
}
|
||||
|
||||
|
@ -33,7 +38,7 @@ public class Relation implements Serializable {
|
|||
return provenance;
|
||||
}
|
||||
|
||||
public void setProvenance(List<Provenance> provenance) {
|
||||
public void setProvenance(final List<Provenance> provenance) {
|
||||
this.provenance = provenance;
|
||||
}
|
||||
|
||||
|
@ -41,7 +46,7 @@ public class Relation implements Serializable {
|
|||
return semantic;
|
||||
}
|
||||
|
||||
public void setSemantic(RelationSemantic semantic) {
|
||||
public void setSemantic(final RelationSemantic semantic) {
|
||||
this.semantic = semantic;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,11 +2,17 @@ package eu.dnetlib.dhp.migration;
|
|||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.sql.Array;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
|
@ -15,14 +21,21 @@ import eu.dnetlib.dhp.schema.oaf.DataInfo;
|
|||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class MigrateDbEntitiesApplication extends AbstractMigrateApplication implements Closeable {
|
||||
|
||||
private static final Qualifier ENTITYREGISTRY_PROVENANCE_ACTION = MigrationUtils
|
||||
.qualifier("sysimport:crosswalk:entityregistry", "sysimport:crosswalk:entityregistry", "dnet:provenance_actions", "dnet:provenance_actions");
|
||||
|
||||
private static final Log log = LogFactory.getLog(MigrateDbEntitiesApplication.class);
|
||||
|
||||
private final DbClient dbClient;
|
||||
|
||||
private final long lastUpdateTimestamp;
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils.toString(MigrateDbEntitiesApplication.class.getResourceAsStream("/eu/dnetlib/dhp/migration/migrate_db_entities_parameters.json")));
|
||||
|
@ -51,6 +64,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
final String dbPassword) throws Exception {
|
||||
super(hdfsPath, hdfsNameNode, hdfsUser);
|
||||
this.dbClient = new DbClient(dbUrl, dbUser, dbPassword);
|
||||
this.lastUpdateTimestamp = new Date().getTime();
|
||||
}
|
||||
|
||||
public void execute(final String sqlFile, final Consumer<ResultSet> consumer) throws Exception {
|
||||
|
@ -61,7 +75,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
public void processDatasource(final ResultSet rs) {
|
||||
try {
|
||||
|
||||
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
|
||||
final Datasource ds = new Datasource();
|
||||
|
||||
|
@ -74,8 +88,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
ds.setExtraInfo(null); // TODO
|
||||
ds.setOaiprovenance(null); // TODO
|
||||
|
||||
ds.setDatasourcetype(null); // Qualifier datasourcetype) {
|
||||
ds.setOpenairecompatibility(null); // Qualifier openairecompatibility) {
|
||||
ds.setDatasourcetype(prepareQualifierSplitting(rs.getString("datasourcetype")));
|
||||
ds.setOpenairecompatibility(prepareQualifierSplitting(rs.getString("openairecompatibility")));
|
||||
ds.setOfficialname(MigrationUtils.field(rs.getString("officialname"), info));
|
||||
ds.setEnglishname(MigrationUtils.field(rs.getString("englishname"), info));
|
||||
ds.setWebsiteurl(MigrationUtils.field(rs.getString("websiteurl"), info));
|
||||
|
@ -86,7 +100,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
ds.setLongitude(MigrationUtils.field(Double.toString(rs.getDouble("longitude")), info));
|
||||
ds.setDateofvalidation(MigrationUtils.field(rs.getDate("dateofvalidation").toString(), info));
|
||||
ds.setDescription(MigrationUtils.field(rs.getString("description"), info));
|
||||
ds.setSubjects(null); // List<StructuredProperty> subjects) {
|
||||
ds.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
|
||||
ds.setOdnumberofitems(MigrationUtils.field(Double.toString(rs.getInt("odnumberofitems")), info));
|
||||
ds.setOdnumberofitemsdate(MigrationUtils.field(rs.getDate("odnumberofitemsdate").toString(), info));
|
||||
ds.setOdpolicies(MigrationUtils.field(rs.getString("odpolicies"), info));
|
||||
|
@ -110,12 +124,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
ds.setPolicies(null); // List<KeyValue> // TODO
|
||||
ds.setJournal(null); // Journal // TODO
|
||||
|
||||
ds.setDataInfo(info);
|
||||
ds.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
// rs.getString("datasourceid");
|
||||
rs.getArray("identities");
|
||||
// rs.getString("officialname");
|
||||
// rs.getString("englishname");
|
||||
// rs.getString("contactemail");
|
||||
rs.getString("openairecompatibility"); // COMPLEX ...@@@...
|
||||
// rs.getString("openairecompatibility"); // COMPLEX ...@@@...
|
||||
// rs.getString("websiteurl");
|
||||
// rs.getString("logourl");
|
||||
// rs.getArray("accessinfopackage");
|
||||
|
@ -124,15 +141,15 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
// rs.getString("namespaceprefix");
|
||||
// rs.getInt("odnumberofitems"); // NULL
|
||||
// rs.getDate("odnumberofitemsdate"); // NULL
|
||||
rs.getArray("subjects");
|
||||
// rs.getArray("subjects");
|
||||
// rs.getString("description");
|
||||
// rs.getString("odpolicies"); // NULL
|
||||
// rs.getArray("odlanguages");
|
||||
// rs.getArray("odcontenttypes");
|
||||
rs.getBoolean("inferred"); // false
|
||||
rs.getBoolean("deletedbyinference");// false
|
||||
rs.getDouble("trust"); // 0.9
|
||||
rs.getString("inferenceprovenance"); // NULL
|
||||
// rs.getBoolean("inferred"); // false
|
||||
// rs.getBoolean("deletedbyinference");// false
|
||||
// rs.getDouble("trust"); // 0.9
|
||||
// rs.getString("inferenceprovenance"); // NULL
|
||||
// rs.getDate("dateofcollection");
|
||||
// rs.getDate("dateofvalidation");
|
||||
// rs.getDate("releasestartdate");
|
||||
|
@ -152,21 +169,22 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
rs.getArray("policies");
|
||||
// rs.getString("collectedfromid");
|
||||
// rs.getString("collectedfromname");
|
||||
rs.getString("datasourcetype"); // COMPLEX XXX@@@@....
|
||||
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
|
||||
// rs.getString("datasourcetype"); // COMPLEX XXX@@@@....
|
||||
// rs.getString("provenanceaction"); //
|
||||
// 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
|
||||
// AS provenanceaction,
|
||||
rs.getString("journal"); // CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal
|
||||
|
||||
emitOaf(ds);
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void processProject(final ResultSet rs) {
|
||||
try {
|
||||
|
||||
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
|
||||
final Project p = new Project();
|
||||
|
||||
|
@ -192,9 +210,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
p.setEcsc39(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsc39")), info));
|
||||
p.setOamandatepublications(MigrationUtils.field(Boolean.toString(rs.getBoolean("oamandatepublications")), info));
|
||||
p.setEcarticle29_3(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecarticle29_3")), info));
|
||||
p.setSubjects(null); // List<StructuredProperty> //TODO
|
||||
p.setSubjects(prepareListOfStructProps(rs.getArray("subjects"), info));
|
||||
p.setFundingtree(null); // List<Field<String>> //TODO
|
||||
p.setContracttype(null); // Qualifier //TODO
|
||||
p.setContracttype(prepareQualifierSplitting(rs.getString("contracttype")));
|
||||
p.setOptional1(MigrationUtils.field(rs.getString("optional1"), info));
|
||||
p.setOptional2(MigrationUtils.field(rs.getString("optional2"), info));
|
||||
p.setJsonextrainfo(MigrationUtils.field(rs.getString("jsonextrainfo"), info));
|
||||
|
@ -207,6 +225,9 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
p.setTotalcost(new Float(rs.getDouble("totalcost")));
|
||||
p.setFundedamount(new Float(rs.getDouble("fundedamount")));
|
||||
|
||||
p.setDataInfo(info);
|
||||
p.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
// rs.getString("projectid");
|
||||
// rs.getString("code");
|
||||
// rs.getString("websiteurl");
|
||||
|
@ -222,13 +243,13 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
// rs.getBoolean("ecarticle29_3");
|
||||
// rs.getDate("dateofcollection");
|
||||
// rs.getDate("dateoftransformation");
|
||||
rs.getBoolean("inferred");
|
||||
rs.getBoolean("deletedbyinference");
|
||||
rs.getDouble("trust");
|
||||
rs.getString("inferenceprovenance");
|
||||
// rs.getBoolean("inferred");
|
||||
// rs.getBoolean("deletedbyinference");
|
||||
// rs.getDouble("trust");
|
||||
// rs.getString("inferenceprovenance");
|
||||
// rs.getString("optional1");
|
||||
// rs.getString("optional2");
|
||||
rs.getString("jsonextrainfo");
|
||||
// rs.getString("jsonextrainfo");
|
||||
// rs.getString("contactfullname");
|
||||
// rs.getString("contactfax");
|
||||
// rs.getString("contactphone");
|
||||
|
@ -248,14 +269,14 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
emitOaf(p);
|
||||
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void processOrganization(final ResultSet rs) {
|
||||
try {
|
||||
|
||||
final DataInfo info = MigrationUtils.dataInfo(null, null, null, null, null, null); // TODO
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
|
||||
final Organization o = new Organization();
|
||||
|
||||
|
@ -269,7 +290,7 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
o.setOaiprovenance(null); // OAIProvenance // TODO
|
||||
o.setLegalshortname(MigrationUtils.field("legalshortname", info));
|
||||
o.setLegalname(MigrationUtils.field("legalname", info));
|
||||
o.setAlternativeNames(null); // List<Field<String>> //TODO
|
||||
o.setAlternativeNames(new ArrayList<>());
|
||||
o.setWebsiteurl(MigrationUtils.field("websiteurl", info));
|
||||
o.setLogourl(MigrationUtils.field("logourl", info));
|
||||
o.setEclegalbody(MigrationUtils.field(Boolean.toString(rs.getBoolean("eclegalbody")), info));
|
||||
|
@ -283,7 +304,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
o.setEcenterprise(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecenterprise")), info));
|
||||
o.setEcsmevalidated(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecsmevalidated")), info));
|
||||
o.setEcnutscode(MigrationUtils.field(Boolean.toString(rs.getBoolean("ecnutscode")), info));
|
||||
o.setCountry(null); // Qualifier country) {
|
||||
o.setCountry(prepareQualifierSplitting(rs.getString("country")));
|
||||
|
||||
o.setDataInfo(info);
|
||||
o.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
|
||||
// rs.getString("organizationid");
|
||||
// rs.getString("legalshortname");
|
||||
|
@ -300,87 +324,160 @@ public class MigrateDbEntitiesApplication extends AbstractMigrateApplication imp
|
|||
// rs.getBoolean("ecenterprise");
|
||||
// rs.getBoolean("ecsmevalidated");
|
||||
// rs.getBoolean("ecnutscode");
|
||||
rs.getDate("dateofcollection");
|
||||
rs.getDate("dateoftransformation");
|
||||
rs.getBoolean("inferred");
|
||||
rs.getBoolean("deletedbyinference");
|
||||
rs.getDouble("trust");
|
||||
rs.getString("inferenceprovenance");
|
||||
// rs.getDate("dateofcollection");
|
||||
// rs.getDate("dateoftransformation");
|
||||
// rs.getBoolean("inferred");
|
||||
// rs.getBoolean("deletedbyinference");
|
||||
// rs.getDouble("trust");
|
||||
// rs.getString("inferenceprovenance");
|
||||
// rs.getString("collectedfromid");
|
||||
// rs.getString("collectedfromname");
|
||||
rs.getString("country");
|
||||
// rs.getString("country");
|
||||
rs.getString("provenanceaction");
|
||||
rs.getArray("pid");
|
||||
|
||||
emitOaf(o);
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void processDatasourceOrganization(final ResultSet rs) {
|
||||
|
||||
try {
|
||||
final Relation r = new Relation();
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
final String orgId = MigrationUtils.createOpenaireId("20", rs.getString("organization"));
|
||||
final String dsId = MigrationUtils.createOpenaireId("10", rs.getString("datasource"));
|
||||
|
||||
r.setRelType(null); // TODO
|
||||
r.setSubRelType(null); // TODO
|
||||
r.setRelClass(null); // TODO
|
||||
r.setSource(null); // TODO
|
||||
r.setTarget(null); // TODO
|
||||
r.setCollectedFrom(MigrationUtils.listKeyValues("", ""));
|
||||
final Relation r1 = new Relation();
|
||||
r1.setRelType("datasourceOrganization");
|
||||
r1.setSubRelType("provision");
|
||||
r1.setRelClass("isProvidedBy");
|
||||
r1.setSource(dsId);
|
||||
r1.setTarget(orgId);
|
||||
r1.setCollectedFrom(null);// TODO
|
||||
r1.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
emitOaf(r1);
|
||||
|
||||
rs.getString("datasource");
|
||||
rs.getString("organization");
|
||||
rs.getDate("startdate"); // NULL
|
||||
rs.getDate("enddate"); // NULL
|
||||
rs.getBoolean("inferred"); // false
|
||||
rs.getBoolean("deletedbyinference"); // false
|
||||
rs.getDouble("trust"); // 0.9
|
||||
rs.getString("inferenceprovenance"); // NULL
|
||||
rs.getString("semantics"); // 'providedBy@@@provided
|
||||
final Relation r2 = new Relation();
|
||||
r2.setRelType("datasourceOrganization");
|
||||
r2.setSubRelType("provision");
|
||||
r2.setRelClass("provides");
|
||||
r2.setSource(orgId);
|
||||
r2.setTarget(dsId);
|
||||
r2.setCollectedFrom(null); // TODO
|
||||
r2.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
emitOaf(r2);
|
||||
|
||||
// rs.getString("datasource");
|
||||
// rs.getString("organization");
|
||||
// rs.getDate("startdate"); // NULL
|
||||
// rs.getDate("enddate"); // NULL
|
||||
// rs.getBoolean("inferred"); // false
|
||||
// rs.getBoolean("deletedbyinference"); // false
|
||||
// rs.getDouble("trust"); // 0.9
|
||||
// rs.getString("inferenceprovenance"); // NULL
|
||||
// rs.getString("semantics"); // 'providedBy@@@provided
|
||||
// by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS
|
||||
// semantics,
|
||||
rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction ||
|
||||
// rs.getString("provenanceaction"); // d.provenanceaction || '@@@' || d.provenanceaction ||
|
||||
// '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
|
||||
|
||||
emitOaf(r);
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void processProjectOrganization(final ResultSet rs) {
|
||||
try {
|
||||
final Relation r = new Relation();
|
||||
final DataInfo info = prepareDataInfo(rs);
|
||||
final String orgId = MigrationUtils.createOpenaireId("20", rs.getString("resporganization"));
|
||||
final String projectId = MigrationUtils.createOpenaireId("40", rs.getString("project"));
|
||||
|
||||
r.setRelType(null); // TODO
|
||||
r.setSubRelType(null); // TODO
|
||||
r.setRelClass(null); // TODO
|
||||
r.setSource(null); // TODO
|
||||
r.setTarget(null); // TODO
|
||||
r.setCollectedFrom(null);
|
||||
final Relation r1 = new Relation();
|
||||
r1.setRelType("projectOrganization");
|
||||
r1.setSubRelType("participation");
|
||||
r1.setRelClass("isParticipant");
|
||||
r1.setSource(projectId);
|
||||
r1.setTarget(orgId);
|
||||
r1.setCollectedFrom(null);// TODO
|
||||
r1.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
emitOaf(r1);
|
||||
|
||||
rs.getString("project");
|
||||
rs.getString("resporganization");
|
||||
rs.getInt("participantnumber");
|
||||
rs.getDouble("contribution");
|
||||
rs.getDate("startdate");// null
|
||||
rs.getDate("enddate");// null
|
||||
rs.getBoolean("inferred");// false
|
||||
rs.getBoolean("deletedbyinference"); // false
|
||||
rs.getDouble("trust");
|
||||
rs.getString("inferenceprovenance"); // NULL
|
||||
rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
|
||||
final Relation r2 = new Relation();
|
||||
r2.setRelType("projectOrganization");
|
||||
r2.setSubRelType("participation");
|
||||
r2.setRelClass("hasParticipant");
|
||||
r2.setSource(orgId);
|
||||
r2.setTarget(projectId);
|
||||
r2.setCollectedFrom(null); // TODO
|
||||
r2.setDataInfo(info);
|
||||
r1.setLastupdatetimestamp(lastUpdateTimestamp);
|
||||
emitOaf(r2);
|
||||
|
||||
// rs.getString("project");
|
||||
// rs.getString("resporganization");
|
||||
// rs.getInt("participantnumber");
|
||||
// rs.getDouble("contribution");
|
||||
// rs.getDate("startdate");// null
|
||||
// rs.getDate("enddate");// null
|
||||
// rs.getBoolean("inferred");// false
|
||||
// rs.getBoolean("deletedbyinference"); // false
|
||||
// rs.getDouble("trust");
|
||||
// rs.getString("inferenceprovenance"); // NULL
|
||||
// rs.getString("semantics"); // po.semanticclass || '@@@' || po.semanticclass ||
|
||||
// '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
|
||||
rs.getString("provenanceaction"); // 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
|
||||
// rs.getString("provenanceaction"); //
|
||||
// 'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions'
|
||||
// AS provenanceaction
|
||||
emitOaf(r);
|
||||
|
||||
} catch (final Exception e) {
|
||||
// TODO: handle exception
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private DataInfo prepareDataInfo(final ResultSet rs) throws SQLException {
|
||||
final Boolean deletedbyinference = rs.getBoolean("deletedbyinference");
|
||||
final String inferenceprovenance = rs.getString("inferenceprovenance");
|
||||
final Boolean inferred = rs.getBoolean("inferred");
|
||||
final String trust = rs.getString("trust");
|
||||
return MigrationUtils.dataInfo(deletedbyinference, inferenceprovenance, inferred, false, ENTITYREGISTRY_PROVENANCE_ACTION, trust);
|
||||
}
|
||||
|
||||
private Qualifier prepareQualifierSplitting(final String s) {
|
||||
if (StringUtils.isBlank(s)) { return null; }
|
||||
final String[] arr = s.split("@@@");
|
||||
return arr.length == 4 ? MigrationUtils.qualifier(arr[0], arr[1], arr[2], arr[3]) : null;
|
||||
}
|
||||
|
||||
private StructuredProperty prepareStructProp(final String s, final DataInfo dataInfo) {
|
||||
if (StringUtils.isBlank(s)) { return null; }
|
||||
final String[] parts = s.split("###");
|
||||
if (parts.length == 2) {
|
||||
final String value = parts[0];
|
||||
final String[] arr = parts[1].split("@@@");
|
||||
if (arr.length == 4) { return MigrationUtils.structuredProperty(value, arr[0], arr[1], arr[2], arr[3], dataInfo); }
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private List<StructuredProperty> prepareListOfStructProps(final Array array, final DataInfo dataInfo) throws SQLException {
|
||||
final List<StructuredProperty> res = new ArrayList<>();
|
||||
if (array != null) {
|
||||
for (final String s : (String[]) array.getArray()) {
|
||||
final StructuredProperty sp = prepareStructProp(s, dataInfo);
|
||||
if (sp != null) {
|
||||
res.add(sp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
|
|
|
@ -10,6 +10,37 @@
|
|||
|
||||
<artifactId>dhp-dedup</artifactId>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
<phase>initialize</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>scala-test-compile</id>
|
||||
<phase>process-test-resources</phase>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
|
|
|
@ -1,10 +1,18 @@
|
|||
package eu.dnetlib.dhp.graph;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import eu.dnetlib.dhp.schema.oaf.*;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
import eu.dnetlib.dhp.schema.oaf.Dataset;
|
||||
import eu.dnetlib.dhp.schema.oaf.Datasource;
|
||||
import eu.dnetlib.dhp.schema.oaf.Organization;
|
||||
import eu.dnetlib.dhp.schema.oaf.OtherResearchProduct;
|
||||
import eu.dnetlib.dhp.schema.oaf.Project;
|
||||
import eu.dnetlib.dhp.schema.oaf.Publication;
|
||||
import eu.dnetlib.dhp.schema.oaf.Relation;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class GraphMappingUtils {
|
||||
|
||||
public final static Map<String, Class> types = Maps.newHashMap();
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
package eu.dnetlib.dhp.graph;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.spark.api.java.JavaRDD;
|
||||
|
@ -9,13 +7,18 @@ import org.apache.spark.api.java.JavaSparkContext;
|
|||
import org.apache.spark.sql.Encoders;
|
||||
import org.apache.spark.sql.SaveMode;
|
||||
import org.apache.spark.sql.SparkSession;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
|
||||
import scala.Tuple2;
|
||||
|
||||
public class SparkGraphImporterJob {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
public static void main(final String[] args) throws Exception {
|
||||
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_parameters.json")));
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils.toString(SparkGraphImporterJob.class.getResourceAsStream("/eu/dnetlib/dhp/graph/input_graph_parameters.json")));
|
||||
parser.parseArgument(args);
|
||||
final SparkSession spark = SparkSession
|
||||
.builder()
|
||||
|
|
37
pom.xml
37
pom.xml
|
@ -1,4 +1,6 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>eu.dnetlib.dhp</groupId>
|
||||
|
@ -232,6 +234,12 @@
|
|||
<version>1.0.0</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mongodb</groupId>
|
||||
<artifactId>mongo-java-driver</artifactId>
|
||||
<version>${mongodb.driver.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.oozie</groupId>
|
||||
<artifactId>oozie-client</artifactId>
|
||||
|
@ -349,31 +357,7 @@
|
|||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>net.alchim31.maven</groupId>
|
||||
<artifactId>scala-maven-plugin</artifactId>
|
||||
<version>4.0.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>scala-compile-first</id>
|
||||
<phase>initialize</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>scala-test-compile</id>
|
||||
<phase>process-test-resources</phase>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<scalaVersion>${scala.version}</scalaVersion>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
</plugins>
|
||||
|
||||
<extensions>
|
||||
|
@ -421,6 +405,7 @@
|
|||
<dhp.jackson.version>2.9.6</dhp.jackson.version>
|
||||
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
|
||||
<scala.version>2.11.12</scala.version>
|
||||
<mongodb.driver.version>3.4.2</mongodb.driver.version>
|
||||
</properties>
|
||||
</project>
|
||||
|
||||
|
|
Loading…
Reference in New Issue