Skip to content

Instantly share code, notes, and snippets.

@seinecle
Created February 10, 2019 21:26
Show Gist options
  • Save seinecle/e5a6349bf3631ce1e4bd401e440687d2 to your computer and use it in GitHub Desktop.
Save seinecle/e5a6349bf3631ce1e4bd401e440687d2 to your computer and use it in GitHub Desktop.
A simple script to identify an extended group of trolling accounts on Twitter, based on tweet posted by several accounts
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package net.clementlevallois.queriestwitterdata.controller;
import Twitter.MyOwnTwitterFactory;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.collect.Sets;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;
import net.clementlevallois.datamining.graph.GraphOperations;
import net.clementlevallois.utils.MultisetMostFrequentFiltering;
import twitter4j.Paging;
import twitter4j.Query;
import twitter4j.QueryResult;
import twitter4j.ResponseList;
import twitter4j.Status;
import twitter4j.Twitter;
import twitter4j.TwitterException;
/**
*
* @author LEVALLOIS
*/
public class ControllerTwitterQueries {
/**
* @param args the command line arguments
*/
// This script identifies Twitter accounts used to to post identical content (can be accounts used to harass people)
// It takes as a departure point an abstract of a tweet. It will then search for accounts (set A1) that posted this abstract
// then it will search for other Twitte accounts (A2) that were used to post content identical to the content posted by A1.
// A1 + A2 can be considered as belonging to the same operation. Once identified with this script, they can be all banned / blocked at once.
static int counterLoops;
public static void main(String[] args) throws TwitterException, IOException {
final MyOwnTwitterFactory factory = new MyOwnTwitterFactory();
final Twitter twitter = factory.createOneTwitterInstance();
Set<Long> trolls = new HashSet();
//getting all authors who trolled with the same message
Query query = new Query("put here the text of the tweet posted by multiple accounts");
QueryResult search = twitter.search(query);
List<Status> tweets = search.getTweets();
Set<String> authors1 = new HashSet();
for (Status status : tweets) {
System.out.println("author: " + status.getUser().getScreenName());
authors1.add(status.getUser().getScreenName());
trolls.add(status.getUser().getId());
}
System.out.println("round 1, number of trolls who published the same tweet: " + authors1.size());
//getting the last 100 tweets from these trolls
Map<String, Set<Status>> last100StatusesPerAuthor = new HashMap();
Map<String, Set<String>> last100StatusesTextPerAuthor = new HashMap();
for (String author : authors1) {
Paging paging = new Paging(1, 100);
ResponseList<Status> userTimeline = twitter.getUserTimeline(author, paging);
Set<Status> last100StatusesForOneAuthor = new HashSet();
Set<String> last100StatusesTextForOneAuthor = new HashSet();
for (Status status : userTimeline) {
last100StatusesForOneAuthor.add(status);
last100StatusesTextForOneAuthor.add(status.getText());
}
last100StatusesPerAuthor.put(author, last100StatusesForOneAuthor);
last100StatusesTextPerAuthor.put(author, last100StatusesTextForOneAuthor);
}
//getting the tweets that these trolls have in common
Multiset<String> allLastStatusesTextFromAllAuthors = HashMultiset.create();
for (Map.Entry<String, Set<String>> entry : last100StatusesTextPerAuthor.entrySet()) {
allLastStatusesTextFromAllAuthors.addAll(entry.getValue());
}
//counting which tweets are published by the most trolls (just counting those tweeted by at least 2 trolls)
MultisetMostFrequentFiltering filter = new MultisetMostFrequentFiltering();
List<Multiset.Entry<String>> sortDesckeepMostfrequent = filter.sortDesckeepAboveMinFreq(allLastStatusesTextFromAllAuthors, 1);
//2.c printing these tweets
System.out.println("-----");
System.out.println("Tweets most common to trolls:");
System.out.println("-----");
for (Multiset.Entry<String> entry : sortDesckeepMostfrequent) {
System.out.println("count: " + entry.getCount() + " ||| " + entry.getElement());
}
//3.Getting the authors who published the same statuses
Set<String> authors2 = new HashSet();
for (Multiset.Entry<String> entry : sortDesckeepMostfrequent) {
query = new Query(entry.getElement());
search = twitter.search(query);
tweets = search.getTweets();
for (Status status : tweets) {
if (!authors1.contains(status.getUser().getScreenName())) {
System.out.println("author: " + status.getUser().getScreenName());
authors2.add(status.getUser().getScreenName());
Set<String> set = new HashSet();
set.add(status.getText());
last100StatusesTextPerAuthor.put(status.getUser().getScreenName(), set);
trolls.add(status.getUser().getId());
}
}
}
System.out.println("round 2, new authors discovered: " + authors2.size());
//1. getting the last 100 tweets from these trolls
for (String author : authors2) {
if (!authors1.contains(author)) {
Paging paging = new Paging(1, 100);
ResponseList<Status> userTimeline = twitter.getUserTimeline(author, paging);
Set<String> last100StatusesTextForOneAuthor = new HashSet();
for (Status status : userTimeline) {
last100StatusesTextForOneAuthor.add(status.getText());
}
Set<String> set = new HashSet();
if (last100StatusesTextPerAuthor.containsKey(author)) {
set.addAll(last100StatusesTextPerAuthor.get(author));
}
set.addAll(last100StatusesTextForOneAuthor);
last100StatusesTextPerAuthor.put(author, set);
}
}
System.out.println("total authors (1 and 2): " + last100StatusesTextPerAuthor.size());
// writing the list of trolls ids in the good format for Twitter's fucntion to import lists of users to block
//
BufferedWriter newBufferedWriter = Files.newBufferedWriter(Paths.get("trolls.csv"), Charset.forName("UTF-8"), StandardOpenOption.CREATE);
StringBuilder sb = new StringBuilder();
for (Long troll : trolls) {
sb.append(String.valueOf(troll)).append("\n");
}
newBufferedWriter.write(sb.toString());
newBufferedWriter.close();
//creating a visual representation of the network of trolls with Gephi (https://gephi.org)
Queue<EdgeTempBase> edgesToLoadInGraph = new ConcurrentLinkedQueue();
counterLoops = 0;
Set<String> keySet = last100StatusesTextPerAuthor.keySet();
keySet.parallelStream().forEach(key1 -> {
Set<String> lists1 = last100StatusesTextPerAuthor.get(key1);
counterLoops++;
if (counterLoops % 100 == 0) {
System.out.println("co-members left to examine: " + (keySet.size() - counterLoops));
}
keySet.stream().forEach(key2 -> {
if (!key1.equals(key2)) {
Set<String> lists2 = last100StatusesTextPerAuthor.get(key2);
Sets.SetView<String> intersection = Sets.intersection(lists1, lists2);
int listsInCommon = intersection.size();
if (listsInCommon >= 1) {
if (key1 != null & key2 != null) {
edgesToLoadInGraph.add(new EdgeTempBase(key1, key2, (double) listsInCommon));
}
}
}
});
});
System.out.println("edges to load in graph: " + edgesToLoadInGraph.size());
GraphOperations graphOps = new GraphOperations();
graphOps.initANewGraph();
for (EdgeTempBase e : edgesToLoadInGraph) {
graphOps.createEdge(e.getSource(), e.getTarget(), e.getWeight(), false);
}
graphOps.loadGraph(false);
graphOps.exportToGexfFile("trolls.gexf");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment