Merge pull request 'update-funders-apis' (#21) from update-funders-apis into master

Reviewed-on: #21
This commit is contained in:
Michele Artini 2024-07-15 14:06:51 +02:00
commit 7b855ab0bb
5 changed files with 251 additions and 194 deletions

View File

@ -1,97 +1,54 @@
package eu.dnetlib.openaire.funders;
import java.io.File;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Component;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import eu.dnetlib.openaire.dsm.dao.MongoLoggerClient;
import eu.dnetlib.openaire.exporter.exceptions.DsmApiException;
import eu.dnetlib.openaire.exporter.model.dsm.AggregationInfo;
import eu.dnetlib.openaire.exporter.model.dsm.AggregationStage;
import eu.dnetlib.openaire.exporter.exceptions.FundersApiException;
import eu.dnetlib.openaire.funders.domain.db.FunderDatasource;
import eu.dnetlib.openaire.funders.domain.db.FunderDbEntry;
import eu.dnetlib.openaire.funders.domain.db.FunderDbUpdate;
import eu.dnetlib.openaire.funders.domain.db.FunderPid;
@Component
@ConditionalOnProperty(value = "openaire.exporter.enable.funders", havingValue = "true")
public class FunderService {
private static final String TEMP_FILE_SUFFIX = ".funds.tmp";
private static final String SEPARATOR = "@=@";
@Autowired
private FunderRepository funderRepository;
@Autowired
private MongoLoggerClient mongoLoggerClient;
private JdbcTemplate jdbcTemplate;
private File tempDir;
public List<FunderDbEntry> getFunders() {
return StreamSupport.stream(funderRepository.findAll().spliterator(), false)
.map(this::patchFunder)
.collect(Collectors.toList());
private File tempFile;
private final DateTimeFormatter DATEFORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static final Log log = LogFactory.getLog(FunderService.class);
@PostConstruct
public void init() {
tempDir = new File(System.getProperty("java.io.tmpdir", "/tmp"));
for (final File f : tempDir.listFiles((FilenameFilter) (dir, name) -> name.endsWith(TEMP_FILE_SUFFIX))) {
deleteFile(f);
}
new Thread(this::updateFunders).start();
public FunderDbEntry getFunder(final String id) throws FundersApiException {
return funderRepository.findById(id)
.map(this::patchFunder)
.orElseThrow(() -> new FundersApiException("Missing Funder: " + id));
}
private void deleteFile(final File f) {
if (f != null && f.exists()) {
log.info("Deleting file: " + f.getAbsolutePath());
f.delete();
}
public boolean isValidFunder(final String id) {
return funderRepository.existsById(id);
}
@Scheduled(cron = "${openaire.exporter.funders.cron}")
public void updateFunders() {
try {
final ObjectMapper mapper = new ObjectMapper();
mapper.registerModule(new JavaTimeModule());
mapper.configure(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS, false);
final File tmp = File.createTempFile("funders-api-", TEMP_FILE_SUFFIX, tempDir);
log.info("Generating funders file: " + tmp.getAbsolutePath());
try (final FileWriter writer = new FileWriter(tmp)) {
writer.write("[");
boolean first = true;
for (final FunderDbEntry funder : funderRepository.findAll()) {
log.info(" - adding: " + funder.getId());
private FunderDbEntry patchFunder(final FunderDbEntry funder) {
// THIS PATCH IS NECESSARY FOR COMPATIBILITY WITH POSTGRES 9.3 (PARTIAL SUPPORT OF THE JSON LIBRARY)
final List<FunderDatasource> datasources = Arrays.stream(funder.getDatasourcesPostgres())
.filter(Objects::nonNull)
.map(s -> s.split(SEPARATOR))
@ -123,61 +80,48 @@ public class FunderService {
funder.setPids(pids);
// END PATCH
addAggregationHistory(funder);
if (first) {
first = false;
} else {
writer.write(",");
}
writer.write(mapper.writeValueAsString(funder));
}
writer.write("]");
log.info("Publish funders file: " + tmp.getAbsolutePath());
deleteFile(tempFile);
setTempFile(tmp);
}
} catch (final Throwable e) {
log.error("Error generating funders file", e);
throw new RuntimeException("Error generating funders file", e);
}
return funder;
}
private void addAggregationHistory(final FunderDbEntry funder) {
public void updateFunder(final String id, final FunderDbUpdate funderUpdate) {
final List<LocalDate> dates = funder.getDatasources()
.stream()
.map(FunderDatasource::getId)
.map(id -> {
try {
return mongoLoggerClient.getAggregationHistoryV2(id);
} catch (final DsmApiException e) {
log.error("Error retrieving the aggregation history", e);
throw new RuntimeException("Error retrieving the aggregation history", e);
}
})
.flatMap(List::stream)
.filter(AggregationInfo::isCompletedSuccessfully)
.filter(info -> info.getAggregationStage() == AggregationStage.TRANSFORM)
.map(AggregationInfo::getDate)
.distinct()
.map(s -> LocalDate.parse(s, DATEFORMATTER))
.sorted(Comparator.reverseOrder())
.limit(10)
.collect(Collectors.toList());
final String sql =
"UPDATE dsm_organizations SET ("
+ " legalshortname,"
+ " legalname,"
+ " websiteurl,"
+ " logourl,"
+ " country,"
+ " registered_funder"
+ ") = ("
+ " coalesce(?, legalshortname),"
+ " coalesce(?, legalname),"
+ " coalesce(?, websiteurl),"
+ " coalesce(?, logourl),"
+ " coalesce(?, country),"
+ " coalesce(?, registered_funder)"
+ ") WHERE id = ?";
funder.setAggregationDates(dates);
jdbcTemplate.update(sql, funderUpdate.getLegalShortName(), funderUpdate.getLegalName(), funderUpdate.getWebsiteUrl(), funderUpdate
.getLogoUrl(), funderUpdate.getCountry(), funderUpdate.getRegistered(), id);
if (funderUpdate.getPids() != null) {
funderUpdate.getPids().forEach(pid -> {
// TODO: the first update should be deleted after the re-implementation of the pid-tables,
// TODO: the field 'type' should also be moved in the second update
if (jdbcTemplate.queryForObject("SELECT count(*) FROM dsm_identities WHERE issuertype = ? AND pid = ?", Integer.class, pid.getType(), pid
.getValue()) == 0) {
jdbcTemplate.update("INSERT INTO dsm_identities(issuertype, pid) VALUES (?, ?)", pid.getType(), pid.getValue());
}
public File getTempFile() {
return tempFile;
if (jdbcTemplate.queryForObject("SELECT count(*) FROM dsm_organizationpids WHERE organization = ? AND pid = ?", Integer.class, id, pid
.getValue()) == 0) {
jdbcTemplate.update("INSERT INTO dsm_organizationpids(organization, pid) VALUES (?, ?)", id, pid.getValue());
}
});
}
public void setTempFile(final File tempFile) {
this.tempFile = tempFile;
}
}

View File

@ -1,25 +1,22 @@
package eu.dnetlib.openaire.funders;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.http.MediaType;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RestController;
import eu.dnetlib.openaire.common.AbstractExporterController;
import eu.dnetlib.openaire.exporter.exceptions.FundersApiException;
import eu.dnetlib.openaire.funders.domain.db.FunderDbEntry;
import eu.dnetlib.openaire.funders.domain.db.FunderDbUpdate;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.responses.ApiResponses;
@ -46,29 +43,46 @@ public class FundersApiController extends AbstractExporterController {
@ApiResponse(responseCode = "200", description = "OK"),
@ApiResponse(responseCode = "500", description = "unexpected error")
})
public void getFunders(final HttpServletResponse res) throws FundersApiException {
res.setContentType(MediaType.APPLICATION_JSON_VALUE);
final File file = service.getTempFile();
if (file == null) {
log.error("Missing temp file (NULL)");
throw new FundersApiException("Missing temp file (NULL)");
public List<FunderDbEntry> getFunders() throws FundersApiException {
try {
return service.getFunders();
} catch (final Throwable e) {
log.error("Error getting funders", e);
throw e;
}
}
if (!file.exists()) {
log.error("Missing temp file " + service.getTempFile());
throw new FundersApiException("Missing temp file " + service.getTempFile());
@RequestMapping(value = "/funders/{id}", produces = {
"application/json"
}, method = RequestMethod.GET)
@Operation(summary = "get a funder by Id", description = "get a funder by Id")
@ApiResponses(value = {
@ApiResponse(responseCode = "200", description = "OK"),
@ApiResponse(responseCode = "500", description = "unexpected error")
})
public FunderDbEntry getFunder(@PathVariable final String id) throws FundersApiException {
try {
return service.getFunder(id);
} catch (final Throwable e) {
log.error("Error getting funder: " + id, e);
throw e;
}
}
try (final InputStream in = new FileInputStream(file); OutputStream out = res.getOutputStream()) {
IOUtils.copy(in, out);
return;
} catch (final Exception e) {
log.error("Error reading file " + service.getTempFile(), e);
throw new FundersApiException("Error reading file " + service.getTempFile(), e);
@RequestMapping(value = "/funders/{id}", produces = {
"application/json"
}, method = RequestMethod.POST)
@Operation(summary = "update a funder by Id", description = "update a funder by Id")
@ApiResponses(value = {
@ApiResponse(responseCode = "200", description = "OK"),
@ApiResponse(responseCode = "500", description = "unexpected error")
})
public FunderDbEntry updateFunder(@PathVariable final String id, @RequestBody final FunderDbUpdate funderUpdate) throws FundersApiException {
if (service.isValidFunder(id)) {
service.updateFunder(id, funderUpdate);
return service.getFunder(id);
}
throw new FundersApiException("Invalid funder: " + id);
}
}

View File

@ -1,7 +1,7 @@
package eu.dnetlib.openaire.funders.domain.db;
import java.io.Serializable;
import java.time.LocalDate;
import java.sql.Date;
import java.util.ArrayList;
import java.util.List;
@ -16,12 +16,14 @@ import org.hibernate.annotations.TypeDef;
import org.hibernate.annotations.TypeDefs;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.vladmihalcea.hibernate.type.array.DateArrayType;
import com.vladmihalcea.hibernate.type.array.StringArrayType;
@Entity
@Table(name = "funders_view")
@TypeDefs({
@TypeDef(name = "string-array", typeClass = StringArrayType.class)
@TypeDef(name = "string-array", typeClass = StringArrayType.class),
@TypeDef(name = "date-array", typeClass = DateArrayType.class),
})
public class FunderDbEntry implements Serializable {
@ -47,7 +49,7 @@ public class FunderDbEntry implements Serializable {
private String country;
@Column(name = "registrationdate")
private LocalDate registrationDate;
private Date registrationDate;
@Column(name = "registered")
private Boolean registered;
@ -68,8 +70,9 @@ public class FunderDbEntry implements Serializable {
@Transient
private List<FunderDatasource> datasources = new ArrayList<FunderDatasource>();
@Transient
private List<LocalDate> aggregationDates;
@Type(type = "date-array")
@Column(name = "aggregationdates", columnDefinition = "date[]")
private Date[] aggregationDates;
public String getId() {
return id;
@ -119,11 +122,11 @@ public class FunderDbEntry implements Serializable {
this.country = country;
}
public LocalDate getRegistrationDate() {
public Date getRegistrationDate() {
return registrationDate;
}
public void setRegistrationDate(final LocalDate registrationDate) {
public void setRegistrationDate(final Date registrationDate) {
this.registrationDate = registrationDate;
}
@ -167,11 +170,11 @@ public class FunderDbEntry implements Serializable {
this.datasources = datasources;
}
public List<LocalDate> getAggregationDates() {
public Date[] getAggregationDates() {
return aggregationDates;
}
public void setAggregationDates(final List<LocalDate> aggregationDates) {
public void setAggregationDates(final Date[] aggregationDates) {
this.aggregationDates = aggregationDates;
}

View File

@ -0,0 +1,74 @@
package eu.dnetlib.openaire.funders.domain.db;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
public class FunderDbUpdate implements Serializable {
private static final long serialVersionUID = -9086478785780647627L;
private String legalShortName;
private String legalName;
private String websiteUrl;
private String logoUrl;
private String country;
private Boolean registered;
private List<FunderPid> pids = new ArrayList<FunderPid>();
public String getLegalShortName() {
return legalShortName;
}
public void setLegalShortName(final String legalShortName) {
this.legalShortName = legalShortName;
}
public String getLegalName() {
return legalName;
}
public void setLegalName(final String legalName) {
this.legalName = legalName;
}
public String getWebsiteUrl() {
return websiteUrl;
}
public void setWebsiteUrl(final String websiteUrl) {
this.websiteUrl = websiteUrl;
}
public String getLogoUrl() {
return logoUrl;
}
public void setLogoUrl(final String logoUrl) {
this.logoUrl = logoUrl;
}
public String getCountry() {
return country;
}
public void setCountry(final String country) {
this.country = country;
}
public Boolean getRegistered() {
return registered;
}
public void setRegistered(final Boolean registered) {
this.registered = registered;
}
public List<FunderPid> getPids() {
return pids;
}
public void setPids(final List<FunderPid> pids) {
this.pids = pids;
}
}

View File

@ -1,6 +1,29 @@
ALTER TABLE dsm_organizations ADD COLUMN registered_funder boolean;
CREATE VIEW funders_view AS SELECT
CREATE TABLE dsm_service_funder (
_dnet_resource_identifier_ varchar(2048) DEFAULT 'temp_'||md5(clock_timestamp()::text)||'_'||md5(random()::text),
service text NOT NULL REFERENCES dsm_services(id) ON DELETE CASCADE,
funder text NOT NULL REFERENCES dsm_organizations(id) ON DELETE CASCADE,
last_aggregation_date date,
PRIMARY KEY(funder, service)
);
INSERT INTO dsm_service_funder(_dnet_resource_identifier_, service, funder, last_aggregation_date)
SELECT
o.id||'@@'||s.id AS _dnet_resource_identifier_,
s.id AS service,
o.id AS funder,
max(a.last_aggregation_date::date) AS last_aggregation_date
FROM
dsm_organizations o
JOIN dsm_service_organization so ON (o.id = so.organization)
JOIN dsm_services s ON (so.service = s.id)
JOIN projects p ON p.collectedfrom = s.id
LEFT OUTER JOIN dsm_api a ON (s.id = a.service)
GROUP BY s.id, o.id;
CREATE OR REPLACE VIEW funders_view AS SELECT
o.id AS id,
o.legalshortname AS legalshortname,
o.legalname AS legalname,
@ -9,17 +32,16 @@ CREATE VIEW funders_view AS SELECT
o.country AS country,
o.dateofcollection AS registrationdate,
o.registered_funder AS registered,
array_agg(DISTINCT s.id||' @=@ '||s.officialname||' @=@ '||s.eosc_datasource_type) AS datasources,
array_agg(DISTINCT pids.issuertype||' @=@ '||pids.pid) AS pids
array_remove(array_agg(DISTINCT s.id||' @=@ '||s.officialname||' @=@ '||s.eosc_datasource_type), NULL) AS datasources,
array_remove(array_agg(DISTINCT sf.last_aggregation_date ORDER BY sf.last_aggregation_date DESC), NULL) AS aggregationdates,
array_remove(array_agg(DISTINCT pids.issuertype||' @=@ '||pids.pid), NULL) AS pids
FROM
dsm_organizations o
JOIN dsm_service_organization so ON (o.id = so.organization)
JOIN dsm_services s ON (so.service = s.id)
JOIN projects p ON (p.collectedfrom = s.id)
JOIN dsm_service_funder sf ON (o.id = sf.funder)
JOIN dsm_services s ON (sf.service = s.id)
LEFT OUTER JOIN dsm_organizationpids opids ON (o.id = opids.organization)
LEFT OUTER JOIN dsm_identities pids ON (opids.pid = pids.pid)
GROUP BY o.id;
GRANT ALL ON dsm_service_funder TO dnetapi;
GRANT ALL ON funders_view TO dnetapi;