Added method getVREHashtagsWithOccurrenceFilteredByTime to filter hashtags too old
git-svn-id: http://svn.research-infrastructures.eu/public/d4science/gcube/trunk/portal/social-networking-library@128535 82a268e6-3cf1-43bd-a215-b396298e98cf
This commit is contained in:
parent
df008d8b66
commit
fb3dfaa024
|
@ -3,6 +3,7 @@ package org.gcube.portal.databook.server;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Calendar;
|
import java.util.Calendar;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -1703,6 +1704,59 @@ public final class DBCassandraAstyanaxImpl implements DatabookStore {
|
||||||
}
|
}
|
||||||
return toReturn;
|
return toReturn;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Map<String, Integer> getVREHashtagsWithOccurrenceFilteredByTime(String vreid, long timestamp){
|
||||||
|
OperationResult<Rows<String, String>> result = null;
|
||||||
|
try {
|
||||||
|
result = conn.getKeyspace().prepareQuery(cf_HashtagsCounter)
|
||||||
|
.getKeySlice(vreid)
|
||||||
|
.execute();
|
||||||
|
} catch (ConnectionException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
HashMap<String, Integer> toReturn = new HashMap<String, Integer> ();
|
||||||
|
|
||||||
|
// Iterate rows and their columns
|
||||||
|
for (Row<String, String> row : result.getResult()) {
|
||||||
|
for (Column<String> column : row.getColumns()) {
|
||||||
|
|
||||||
|
// retrieve the feeds list for this hashtag
|
||||||
|
List<Feed> feeds = null;
|
||||||
|
try{
|
||||||
|
feeds = getVREFeedsByHashtag(vreid, column.getName());
|
||||||
|
}catch(Exception e){
|
||||||
|
_log.error("Unable to retrieve the list of feeds for hashtag" + column.getName() + " in vre " + vreid);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(feeds.isEmpty()){
|
||||||
|
|
||||||
|
_log.info("There are no feeds containing hashtag " + column.getName() + " in vre " + vreid);
|
||||||
|
continue;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// retrieve the most recent one among these feeds
|
||||||
|
Collections.sort(feeds, Collections.reverseOrder());
|
||||||
|
|
||||||
|
if(feeds.get(0).getTime().getTime() < timestamp){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// else..
|
||||||
|
int curValue = Integer.parseInt(column.getStringValue());
|
||||||
|
|
||||||
|
if (curValue > 0)
|
||||||
|
toReturn.put(column.getName(), curValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return toReturn;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -1,27 +1,25 @@
|
||||||
package org.gcube.portal.databook.server;
|
package org.gcube.portal.databook.server;
|
||||||
|
|
||||||
import static org.junit.Assert.assertTrue;
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Calendar;
|
||||||
import java.util.ArrayList;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.UUID;
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import org.gcube.portal.databook.shared.Attachment;
|
|
||||||
import org.gcube.portal.databook.shared.Comment;
|
|
||||||
import org.gcube.portal.databook.shared.Feed;
|
import org.gcube.portal.databook.shared.Feed;
|
||||||
import org.gcube.portal.databook.shared.FeedType;
|
import org.gcube.portal.databook.shared.ex.ColumnNameNotFoundException;
|
||||||
import org.gcube.portal.databook.shared.PrivacyLevel;
|
|
||||||
import org.gcube.portal.databook.shared.ex.CommentIDNotFoundException;
|
|
||||||
import org.gcube.portal.databook.shared.ex.FeedIDNotFoundException;
|
import org.gcube.portal.databook.shared.ex.FeedIDNotFoundException;
|
||||||
|
import org.gcube.portal.databook.shared.ex.FeedTypeNotFoundException;
|
||||||
|
import org.gcube.portal.databook.shared.ex.PrivacyLevelTypeNotFoundException;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import com.netflix.astyanax.connectionpool.exceptions.ConnectionException;
|
|
||||||
import com.netflix.astyanax.model.ColumnFamily;
|
|
||||||
import com.netflix.astyanax.serializers.StringSerializer;
|
|
||||||
|
|
||||||
public class DatabookCassandraTest {
|
public class DatabookCassandraTest {
|
||||||
private static DBCassandraAstyanaxImpl store;
|
private static DBCassandraAstyanaxImpl store;
|
||||||
|
|
||||||
|
@ -37,6 +35,109 @@ public class DatabookCassandraTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @Test
|
||||||
|
// public void getHashTagsFilteredByTime() throws PrivacyLevelTypeNotFoundException, FeedTypeNotFoundException, FeedIDNotFoundException, ColumnNameNotFoundException{
|
||||||
|
//
|
||||||
|
// int windowSize = 6; // go back windowSize months
|
||||||
|
//
|
||||||
|
// String vreUnderTest = "/gcube/devsec/devVRE";
|
||||||
|
//
|
||||||
|
// // reference time
|
||||||
|
// Calendar calendar = Calendar.getInstance();
|
||||||
|
// int currentMonth = calendar.get(Calendar.MONTH); // jan = 0, ..... dec = 11
|
||||||
|
// calendar.set(Calendar.MONTH, currentMonth - windowSize); // the year is automatically decreased if needed
|
||||||
|
// SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
|
||||||
|
// System.out.println("Reference time for trending topics is " + format.format(calendar.getTime()));
|
||||||
|
//
|
||||||
|
// Map<String, Integer> res =
|
||||||
|
// store.getVREHashtagsWithOccurrenceFilteredByTime(
|
||||||
|
// vreUnderTest, calendar.getTimeInMillis());
|
||||||
|
//
|
||||||
|
// // find max score inside the list (counter)
|
||||||
|
// int max = 0;
|
||||||
|
// for(Entry<String, Integer> entry : res.entrySet()){
|
||||||
|
//
|
||||||
|
// max = max < entry.getValue() ? entry.getValue() : max;
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // normalize
|
||||||
|
// Map<String, Double> normalized = new HashMap<String, Double>();
|
||||||
|
// for(Entry<String, Integer> entry : res.entrySet()){
|
||||||
|
//
|
||||||
|
// normalized.put(entry.getKey(), (double)entry.getValue() / (double)max);
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // create the weight for each entry as:
|
||||||
|
// // w = 0.6 * normalized_score + 0.4 * freshness
|
||||||
|
// // freshness is evaluated as (window_size - latest_feed_for_hashtag_in_window_month)/window_size
|
||||||
|
// Map<String, Double> scoredList = new HashMap<String, Double>();
|
||||||
|
// for(Entry<String, Integer> entry : res.entrySet()){
|
||||||
|
//
|
||||||
|
// double weight = 0.6 * normalized.get(entry.getKey());
|
||||||
|
//
|
||||||
|
// // retrieve the last feed for this hashtag and locate it into the window
|
||||||
|
// List<Feed> mostRecentFeedForHashtag = store.getVREFeedsByHashtag(vreUnderTest, entry.getKey());
|
||||||
|
//
|
||||||
|
// // retrieve the most recent one among these feeds
|
||||||
|
// Collections.sort(mostRecentFeedForHashtag, Collections.reverseOrder());
|
||||||
|
//
|
||||||
|
// // locate into the window
|
||||||
|
// Calendar locateInWindow = Calendar.getInstance();
|
||||||
|
// locateInWindow.setTimeInMillis(mostRecentFeedForHashtag.get(0).getTime().getTime());
|
||||||
|
//
|
||||||
|
// // get the month
|
||||||
|
// int sub = currentMonth - locateInWindow.get(Calendar.MONTH);
|
||||||
|
// int value = sub >= 0? sub : 12 - Math.abs(sub);
|
||||||
|
// double freshness = (double)(windowSize - value) / (double)(windowSize);
|
||||||
|
// System.out.println("freshness is " + freshness + " because the last feed has month " + locateInWindow.get(Calendar.MONTH));
|
||||||
|
//
|
||||||
|
// weight += 0.4 * freshness;
|
||||||
|
//
|
||||||
|
// scoredList.put(entry.getKey(), weight);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // print sorted
|
||||||
|
// Map<String, Double> scoredListSorted = sortByValue(scoredList);
|
||||||
|
// for(Entry<String, Double> entry : scoredListSorted.entrySet()){
|
||||||
|
//
|
||||||
|
// System.out.println("[hashtag=" + entry.getKey() + " , weight=" + entry.getValue() + "]");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// public static <K, V extends Comparable<? super V>> Map<K, V>
|
||||||
|
// sortByValue( Map<K, V> map )
|
||||||
|
// {
|
||||||
|
// List<Map.Entry<K, V>> list =
|
||||||
|
// new LinkedList<Map.Entry<K, V>>( map.entrySet() );
|
||||||
|
// Collections.sort( list, new Comparator<Map.Entry<K, V>>()
|
||||||
|
// {
|
||||||
|
// public int compare( Map.Entry<K, V> o1, Map.Entry<K, V> o2 )
|
||||||
|
// {
|
||||||
|
// return (o2.getValue()).compareTo( o1.getValue() );
|
||||||
|
// }
|
||||||
|
// });
|
||||||
|
//
|
||||||
|
// Map<K, V> result = new LinkedHashMap<K, V>();
|
||||||
|
// for (Map.Entry<K, V> entry : list)
|
||||||
|
// {
|
||||||
|
// result.put( entry.getKey(), entry.getValue() );
|
||||||
|
// }
|
||||||
|
// return result;
|
||||||
|
// }
|
||||||
|
|
||||||
|
|
||||||
|
// @Test
|
||||||
|
// public void getHashTags() throws PrivacyLevelTypeNotFoundException, FeedTypeNotFoundException, FeedIDNotFoundException, ColumnNameNotFoundException{
|
||||||
|
// List<Feed> resList = store.getVREFeedsByHashtag("/gcube/devsec/devVRE", "#test");
|
||||||
|
//
|
||||||
|
// for (Feed feed : resList) {
|
||||||
|
// System.out.println(feed.getTime());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
|
||||||
// @Test
|
// @Test
|
||||||
// public void getComment(){
|
// public void getComment(){
|
||||||
//
|
//
|
||||||
|
|
|
@ -370,6 +370,13 @@ public interface DatabookStore {
|
||||||
* @return a HashMap<String, Integer> of vre Hashtags associated with their occurrence
|
* @return a HashMap<String, Integer> of vre Hashtags associated with their occurrence
|
||||||
*/
|
*/
|
||||||
Map<String, Integer> getVREHashtagsWithOccurrence(String vreid);
|
Map<String, Integer> getVREHashtagsWithOccurrence(String vreid);
|
||||||
|
/**
|
||||||
|
* get a map of vre hashtags where the key is the hashtag and the value is the occurrence of the hashtag in the VRE
|
||||||
|
* @param vreid vre identifier (scope)
|
||||||
|
* @param timestamp do not consider hashtags used before timestamp
|
||||||
|
* @return a HashMap<String, Integer> of vre Hashtags associated with their occurrence
|
||||||
|
*/
|
||||||
|
Map<String, Integer> getVREHashtagsWithOccurrenceFilteredByTime(String vreid, long timestamp);
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @param vreid VRE identifier
|
* @param vreid VRE identifier
|
||||||
|
|
Loading…
Reference in New Issue