137 lines
2.3 KiB
Java
137 lines
2.3 KiB
Java
package eu.dnetlib.bioschemas.api.crawl;
|
|
|
|
import java.util.Date;
|
|
|
|
import hwu.elixir.utils.Validation;
|
|
|
|
|
|
/**
|
|
*
|
|
* Store the current status of a single URL in the scrape service.
|
|
*
|
|
*
|
|
*/
|
|
|
|
|
|
public class CrawlRecord {
|
|
|
|
private Long id;
|
|
|
|
private String context = "";
|
|
|
|
private String url;
|
|
|
|
private Date dateScraped;
|
|
|
|
private StatusOfScrape status;
|
|
|
|
private boolean beingScraped;
|
|
|
|
private String name;
|
|
|
|
private String nquads;
|
|
|
|
public CrawlRecord() {
|
|
status = StatusOfScrape.UNTRIED;
|
|
}
|
|
|
|
public CrawlRecord(String url) {
|
|
Validation validation = new Validation();
|
|
if(validation.validateURI(url)) {
|
|
this.url = url;
|
|
context = "";
|
|
status = StatusOfScrape.UNTRIED;
|
|
dateScraped = null;
|
|
} else {
|
|
throw new IllegalArgumentException(url +" is not a valid url");
|
|
}
|
|
this.setId(System.currentTimeMillis());
|
|
}
|
|
|
|
public Long getId() {
|
|
return id;
|
|
}
|
|
|
|
public void setId(Long id) {
|
|
this.id = id;
|
|
}
|
|
|
|
public String getUrl() {
|
|
return url;
|
|
}
|
|
|
|
public Date getDateScraped() {
|
|
return dateScraped;
|
|
}
|
|
|
|
public void setDateScraped(Date dateScraped) {
|
|
this.dateScraped = dateScraped;
|
|
}
|
|
|
|
public StatusOfScrape getStatus() {
|
|
return status;
|
|
}
|
|
|
|
public void setStatus(StatusOfScrape status) {
|
|
this.status = status;
|
|
}
|
|
|
|
public String getContext() {
|
|
return context;
|
|
}
|
|
|
|
public void setContext(String context) {
|
|
this.context = context;
|
|
}
|
|
|
|
public boolean isBeingScraped() {
|
|
return beingScraped;
|
|
}
|
|
|
|
public void setBeingScraped(boolean beingScraped) {
|
|
this.beingScraped = beingScraped;
|
|
}
|
|
|
|
public String getName() {
|
|
return name;
|
|
}
|
|
|
|
public void setName(String name) {
|
|
this.name = name;
|
|
}
|
|
|
|
public String getNquads() {
|
|
return nquads;
|
|
}
|
|
|
|
public void setNquads(String nquads) {
|
|
this.nquads = nquads;
|
|
}
|
|
|
|
@Override
|
|
public boolean equals(Object o) {
|
|
if (this == o)
|
|
return true;
|
|
if (!(o instanceof CrawlRecord))
|
|
return false;
|
|
|
|
CrawlRecord otherCrawl = (CrawlRecord) o;
|
|
|
|
if(this.url.equals(otherCrawl.getUrl())) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
@Override
|
|
public int hashCode() {
|
|
int result = getId() != null ? getId().hashCode() : 0;
|
|
result = 31 * result + (getUrl() != null ? getUrl().hashCode() : 0);
|
|
result = 31 * result + (getContext() != null ? getContext().hashCode() : 0);
|
|
result = 31 * result + (getDateScraped() != null ? getDateScraped().hashCode() : 0);
|
|
return result;
|
|
}
|
|
|
|
}
|