dump #50

Merged
claudio.atzori merged 98 commits from miriam.baglioni/dnet-hadoop:dump into master 2020-11-04 18:07:01 +01:00
4 changed files with 11 additions and 13 deletions
Showing only changes of commit dc5096a327 - Show all commits

View File

@ -3,11 +3,13 @@ package eu.dnetlib.dhp.oa.graph.dump;
import java.io.Serializable;
import java.util.Optional;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.common.api.MissingConceptDoiException;
import eu.dnetlib.dhp.common.api.ZenodoAPIClient;

View File

@ -23,13 +23,11 @@ import eu.dnetlib.dhp.schema.oaf.KeyValue;
import eu.dnetlib.dhp.schema.oaf.Result;
/**
* Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity.
* The new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context
* related to communities and research initiative/infrastructures.
*
* For collectedfrom elements it creates: datasource -> provides -> result and result -> isProvidedBy -> datasource
* For hostedby elements it creates: datasource -> hosts -> result and result -> isHostedBy -> datasource
* For context elements it creates: context <-> isRelatedTo <-> result
* Creates new Relations (as in eu.dnetlib.dhp.schema.dump.oaf.graph.Relation) from the information in the Entity. The
* new Relations are created for the datasource in the collectedfrom and hostedby elements and for the context related
* to communities and research initiative/infrastructures. For collectedfrom elements it creates: datasource -> provides
* -> result and result -> isProvidedBy -> datasource For hostedby elements it creates: datasource -> hosts -> result
* and result -> isHostedBy -> datasource For context elements it creates: context <-> isRelatedTo <-> result
*/
public class Extractor implements Serializable {

View File

@ -16,9 +16,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.Provenance;
import eu.dnetlib.dhp.schema.dump.oaf.graph.*;
/**
* It process the ContextInfo information to produce a new Context Entity or a set of Relations between the
* generic context entity and datasource/projects related to the context.
*
* It process the ContextInfo information to produce a new Context Entity or a set of Relations between the generic
* context entity and datasource/projects related to the context.
*/
public class Process implements Serializable {
private static final Logger log = LoggerFactory.getLogger(Process.class);

View File

@ -1,5 +1,4 @@
package eu.dnetlib.dhp.oa.graph.dump.graph;
import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession;
@ -28,8 +27,8 @@ import eu.dnetlib.dhp.schema.dump.oaf.graph.RelType;
import eu.dnetlib.dhp.schema.oaf.Relation;
/**
* Create new Relations between Context Entities and Organizations whose products are associated to the context.
* It produces relation such as: organization <-> isRelatedTo <-> context
* Create new Relations between Context Entities and Organizations whose products are associated to the context. It
* produces relation such as: organization <-> isRelatedTo <-> context
*/
public class SparkOrganizationRelation implements Serializable {
private static final Logger log = LoggerFactory.getLogger(SparkOrganizationRelation.class);