2021-04-15 02:36:08 +02:00
|
|
|
package eu.openaire.urls_controller.models;
|
|
|
|
|
2021-04-24 20:05:21 +02:00
|
|
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
2021-04-15 02:36:08 +02:00
|
|
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
2021-04-24 20:05:21 +02:00
|
|
|
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
|
2021-04-15 02:36:08 +02:00
|
|
|
|
2021-11-30 18:59:46 +01:00
|
|
|
import java.sql.Timestamp;
|
2021-04-15 02:36:08 +02:00
|
|
|
|
|
|
|
|
2021-04-24 20:05:21 +02:00
|
|
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
|
|
|
@JsonPropertyOrder({
|
2021-07-05 13:04:39 +02:00
|
|
|
"id",
|
|
|
|
"original_url",
|
|
|
|
"datasource",
|
2021-04-24 20:05:21 +02:00
|
|
|
"workerId",
|
Avoid assigning the same publications multiple times to the Workers, after the recent "parallelization enchantment".
After that enchantment, each worker could request multiple assignment-batches, before its previous batches were processed by the Controller. This means that for each batch that was processed, the Controller was deleting from the "assignment" table, all the assignments (-batches) delivered to the Worker that brought that batch, even though the "attempt" and "payload" records for the rest of the batches were not inserted in the DB yet. So in a new assignments-batch request, the same publications that were already under processing, were delivered to the same or other Workers.
Now, for each finished batch, only the assignments of that batch are deleted from the "assignment" table.
2023-07-11 16:27:23 +02:00
|
|
|
"assignments_batch_counter",
|
2021-11-30 18:59:46 +01:00
|
|
|
"timestamp"
|
2021-04-24 20:05:21 +02:00
|
|
|
})
|
2021-09-23 14:08:52 +02:00
|
|
|
public class Assignment {
|
2021-04-15 02:36:08 +02:00
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
@JsonProperty("id")
|
|
|
|
private String id;
|
2021-06-09 04:48:54 +02:00
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
@JsonProperty("original_url")
|
|
|
|
private String originalUrl;
|
2021-04-15 02:36:08 +02:00
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
@JsonProperty("datasource")
|
|
|
|
private Datasource datasource;
|
|
|
|
|
|
|
|
@JsonProperty("workerid")
|
2021-04-24 20:05:21 +02:00
|
|
|
private String workerId;
|
2021-04-15 02:36:08 +02:00
|
|
|
|
Avoid assigning the same publications multiple times to the Workers, after the recent "parallelization enchantment".
After that enchantment, each worker could request multiple assignment-batches, before its previous batches were processed by the Controller. This means that for each batch that was processed, the Controller was deleting from the "assignment" table, all the assignments (-batches) delivered to the Worker that brought that batch, even though the "attempt" and "payload" records for the rest of the batches were not inserted in the DB yet. So in a new assignments-batch request, the same publications that were already under processing, were delivered to the same or other Workers.
Now, for each finished batch, only the assignments of that batch are deleted from the "assignment" table.
2023-07-11 16:27:23 +02:00
|
|
|
@JsonProperty("assignments_batch_counter")
|
|
|
|
private long assignmentsBatchCounter;
|
|
|
|
|
2021-11-30 18:59:46 +01:00
|
|
|
@JsonProperty("timestamp")
|
|
|
|
private Timestamp timestamp;
|
2021-04-24 20:05:21 +02:00
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
|
|
|
|
public Assignment() {}
|
|
|
|
|
Avoid assigning the same publications multiple times to the Workers, after the recent "parallelization enchantment".
After that enchantment, each worker could request multiple assignment-batches, before its previous batches were processed by the Controller. This means that for each batch that was processed, the Controller was deleting from the "assignment" table, all the assignments (-batches) delivered to the Worker that brought that batch, even though the "attempt" and "payload" records for the rest of the batches were not inserted in the DB yet. So in a new assignments-batch request, the same publications that were already under processing, were delivered to the same or other Workers.
Now, for each finished batch, only the assignments of that batch are deleted from the "assignment" table.
2023-07-11 16:27:23 +02:00
|
|
|
|
|
|
|
public Assignment(String id, String originalUrl, Datasource datasource, String workerId, long assignmentsBatchCounter, Timestamp timestamp) {
|
2021-07-05 13:04:39 +02:00
|
|
|
this.id = id;
|
|
|
|
this.originalUrl = originalUrl;
|
|
|
|
this.datasource = datasource;
|
2021-04-24 20:05:21 +02:00
|
|
|
this.workerId = workerId;
|
Avoid assigning the same publications multiple times to the Workers, after the recent "parallelization enchantment".
After that enchantment, each worker could request multiple assignment-batches, before its previous batches were processed by the Controller. This means that for each batch that was processed, the Controller was deleting from the "assignment" table, all the assignments (-batches) delivered to the Worker that brought that batch, even though the "attempt" and "payload" records for the rest of the batches were not inserted in the DB yet. So in a new assignments-batch request, the same publications that were already under processing, were delivered to the same or other Workers.
Now, for each finished batch, only the assignments of that batch are deleted from the "assignment" table.
2023-07-11 16:27:23 +02:00
|
|
|
this.assignmentsBatchCounter = assignmentsBatchCounter;
|
2021-11-30 18:59:46 +01:00
|
|
|
this.timestamp = timestamp;
|
2021-04-24 20:05:21 +02:00
|
|
|
}
|
2021-04-15 02:36:08 +02:00
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
|
|
|
|
public String getId() {
|
|
|
|
return id;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setId(String id) {
|
|
|
|
this.id = id;
|
|
|
|
}
|
|
|
|
|
|
|
|
public String getOriginalUrl() {
|
|
|
|
return originalUrl;
|
2021-06-09 04:48:54 +02:00
|
|
|
}
|
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
public void setOriginalUrl(String originalUrl) {
|
|
|
|
this.originalUrl = originalUrl;
|
2021-06-09 04:48:54 +02:00
|
|
|
}
|
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
public Datasource getDatasource() {
|
|
|
|
return datasource;
|
2021-04-15 02:36:08 +02:00
|
|
|
}
|
|
|
|
|
2021-07-05 13:04:39 +02:00
|
|
|
public void setDatasource(Datasource datasource) {
|
|
|
|
this.datasource = datasource;
|
2021-04-15 02:36:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
public String getWorkerId() {
|
|
|
|
return workerId;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setWorkerId(String workerId) {
|
|
|
|
this.workerId = workerId;
|
|
|
|
}
|
|
|
|
|
Avoid assigning the same publications multiple times to the Workers, after the recent "parallelization enchantment".
After that enchantment, each worker could request multiple assignment-batches, before its previous batches were processed by the Controller. This means that for each batch that was processed, the Controller was deleting from the "assignment" table, all the assignments (-batches) delivered to the Worker that brought that batch, even though the "attempt" and "payload" records for the rest of the batches were not inserted in the DB yet. So in a new assignments-batch request, the same publications that were already under processing, were delivered to the same or other Workers.
Now, for each finished batch, only the assignments of that batch are deleted from the "assignment" table.
2023-07-11 16:27:23 +02:00
|
|
|
public long getAssignmentsBatchCounter() {
|
|
|
|
return assignmentsBatchCounter;
|
|
|
|
}
|
|
|
|
|
|
|
|
public void setAssignmentsBatchCounter(long assignmentsBatchCounter) {
|
|
|
|
this.assignmentsBatchCounter = assignmentsBatchCounter;
|
|
|
|
}
|
|
|
|
|
2021-11-30 18:59:46 +01:00
|
|
|
public Timestamp getTimestamp() {
|
|
|
|
return timestamp;
|
2021-04-15 02:36:08 +02:00
|
|
|
}
|
|
|
|
|
2021-11-30 18:59:46 +01:00
|
|
|
public void setTimestamp(Timestamp timestamp) {
|
|
|
|
this.timestamp = timestamp;
|
2021-04-15 02:36:08 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String toString() {
|
|
|
|
return "Assignment{" +
|
2021-07-05 13:04:39 +02:00
|
|
|
"id='" + id + '\'' +
|
|
|
|
", originalUrl='" + originalUrl + '\'' +
|
|
|
|
", datasource=" + datasource +
|
2021-04-15 02:36:08 +02:00
|
|
|
", workerId='" + workerId + '\'' +
|
Avoid assigning the same publications multiple times to the Workers, after the recent "parallelization enchantment".
After that enchantment, each worker could request multiple assignment-batches, before its previous batches were processed by the Controller. This means that for each batch that was processed, the Controller was deleting from the "assignment" table, all the assignments (-batches) delivered to the Worker that brought that batch, even though the "attempt" and "payload" records for the rest of the batches were not inserted in the DB yet. So in a new assignments-batch request, the same publications that were already under processing, were delivered to the same or other Workers.
Now, for each finished batch, only the assignments of that batch are deleted from the "assignment" table.
2023-07-11 16:27:23 +02:00
|
|
|
", assignmentsBatchCounter=" + assignmentsBatchCounter +
|
2021-11-30 18:59:46 +01:00
|
|
|
", timestamp=" + timestamp +
|
2021-04-15 02:36:08 +02:00
|
|
|
'}';
|
|
|
|
}
|
Avoid assigning the same publications multiple times to the Workers, after the recent "parallelization enchantment".
After that enchantment, each worker could request multiple assignment-batches, before its previous batches were processed by the Controller. This means that for each batch that was processed, the Controller was deleting from the "assignment" table, all the assignments (-batches) delivered to the Worker that brought that batch, even though the "attempt" and "payload" records for the rest of the batches were not inserted in the DB yet. So in a new assignments-batch request, the same publications that were already under processing, were delivered to the same or other Workers.
Now, for each finished batch, only the assignments of that batch are deleted from the "assignment" table.
2023-07-11 16:27:23 +02:00
|
|
|
|
2021-04-15 02:36:08 +02:00
|
|
|
}
|