Created
February 10, 2019 21:26
-
-
Save seinecle/e5a6349bf3631ce1e4bd401e440687d2 to your computer and use it in GitHub Desktop.
A simple script to identify an extended group of trolling accounts on Twitter, based on tweet posted by several accounts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* To change this license header, choose License Headers in Project Properties. | |
* To change this template file, choose Tools | Templates | |
* and open the template in the editor. | |
*/ | |
package net.clementlevallois.queriestwitterdata.controller; | |
import Twitter.MyOwnTwitterFactory; | |
import com.google.common.collect.HashMultiset; | |
import com.google.common.collect.Multiset; | |
import com.google.common.collect.Sets; | |
import java.io.BufferedWriter; | |
import java.io.IOException; | |
import java.nio.charset.Charset; | |
import java.nio.file.Files; | |
import java.nio.file.Paths; | |
import java.nio.file.StandardOpenOption; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Queue; | |
import java.util.Set; | |
import java.util.concurrent.ConcurrentLinkedQueue; | |
import net.clementlevallois.datamining.graph.GraphOperations; | |
import net.clementlevallois.utils.MultisetMostFrequentFiltering; | |
import twitter4j.Paging; | |
import twitter4j.Query; | |
import twitter4j.QueryResult; | |
import twitter4j.ResponseList; | |
import twitter4j.Status; | |
import twitter4j.Twitter; | |
import twitter4j.TwitterException; | |
/** | |
* | |
* @author LEVALLOIS | |
*/ | |
public class ControllerTwitterQueries { | |
/** | |
* @param args the command line arguments | |
*/ | |
// This script identifies Twitter accounts used to to post identical content (can be accounts used to harass people) | |
// It takes as a departure point an abstract of a tweet. It will then search for accounts (set A1) that posted this abstract | |
// then it will search for other Twitte accounts (A2) that were used to post content identical to the content posted by A1. | |
// A1 + A2 can be considered as belonging to the same operation. Once identified with this script, they can be all banned / blocked at once. | |
static int counterLoops; | |
public static void main(String[] args) throws TwitterException, IOException { | |
final MyOwnTwitterFactory factory = new MyOwnTwitterFactory(); | |
final Twitter twitter = factory.createOneTwitterInstance(); | |
Set<Long> trolls = new HashSet(); | |
//getting all authors who trolled with the same message | |
Query query = new Query("put here the text of the tweet posted by multiple accounts"); | |
QueryResult search = twitter.search(query); | |
List<Status> tweets = search.getTweets(); | |
Set<String> authors1 = new HashSet(); | |
for (Status status : tweets) { | |
System.out.println("author: " + status.getUser().getScreenName()); | |
authors1.add(status.getUser().getScreenName()); | |
trolls.add(status.getUser().getId()); | |
} | |
System.out.println("round 1, number of trolls who published the same tweet: " + authors1.size()); | |
//getting the last 100 tweets from these trolls | |
Map<String, Set<Status>> last100StatusesPerAuthor = new HashMap(); | |
Map<String, Set<String>> last100StatusesTextPerAuthor = new HashMap(); | |
for (String author : authors1) { | |
Paging paging = new Paging(1, 100); | |
ResponseList<Status> userTimeline = twitter.getUserTimeline(author, paging); | |
Set<Status> last100StatusesForOneAuthor = new HashSet(); | |
Set<String> last100StatusesTextForOneAuthor = new HashSet(); | |
for (Status status : userTimeline) { | |
last100StatusesForOneAuthor.add(status); | |
last100StatusesTextForOneAuthor.add(status.getText()); | |
} | |
last100StatusesPerAuthor.put(author, last100StatusesForOneAuthor); | |
last100StatusesTextPerAuthor.put(author, last100StatusesTextForOneAuthor); | |
} | |
//getting the tweets that these trolls have in common | |
Multiset<String> allLastStatusesTextFromAllAuthors = HashMultiset.create(); | |
for (Map.Entry<String, Set<String>> entry : last100StatusesTextPerAuthor.entrySet()) { | |
allLastStatusesTextFromAllAuthors.addAll(entry.getValue()); | |
} | |
//counting which tweets are published by the most trolls (just counting those tweeted by at least 2 trolls) | |
MultisetMostFrequentFiltering filter = new MultisetMostFrequentFiltering(); | |
List<Multiset.Entry<String>> sortDesckeepMostfrequent = filter.sortDesckeepAboveMinFreq(allLastStatusesTextFromAllAuthors, 1); | |
//2.c printing these tweets | |
System.out.println("-----"); | |
System.out.println("Tweets most common to trolls:"); | |
System.out.println("-----"); | |
for (Multiset.Entry<String> entry : sortDesckeepMostfrequent) { | |
System.out.println("count: " + entry.getCount() + " ||| " + entry.getElement()); | |
} | |
//3.Getting the authors who published the same statuses | |
Set<String> authors2 = new HashSet(); | |
for (Multiset.Entry<String> entry : sortDesckeepMostfrequent) { | |
query = new Query(entry.getElement()); | |
search = twitter.search(query); | |
tweets = search.getTweets(); | |
for (Status status : tweets) { | |
if (!authors1.contains(status.getUser().getScreenName())) { | |
System.out.println("author: " + status.getUser().getScreenName()); | |
authors2.add(status.getUser().getScreenName()); | |
Set<String> set = new HashSet(); | |
set.add(status.getText()); | |
last100StatusesTextPerAuthor.put(status.getUser().getScreenName(), set); | |
trolls.add(status.getUser().getId()); | |
} | |
} | |
} | |
System.out.println("round 2, new authors discovered: " + authors2.size()); | |
//1. getting the last 100 tweets from these trolls | |
for (String author : authors2) { | |
if (!authors1.contains(author)) { | |
Paging paging = new Paging(1, 100); | |
ResponseList<Status> userTimeline = twitter.getUserTimeline(author, paging); | |
Set<String> last100StatusesTextForOneAuthor = new HashSet(); | |
for (Status status : userTimeline) { | |
last100StatusesTextForOneAuthor.add(status.getText()); | |
} | |
Set<String> set = new HashSet(); | |
if (last100StatusesTextPerAuthor.containsKey(author)) { | |
set.addAll(last100StatusesTextPerAuthor.get(author)); | |
} | |
set.addAll(last100StatusesTextForOneAuthor); | |
last100StatusesTextPerAuthor.put(author, set); | |
} | |
} | |
System.out.println("total authors (1 and 2): " + last100StatusesTextPerAuthor.size()); | |
// writing the list of trolls ids in the good format for Twitter's fucntion to import lists of users to block | |
// | |
BufferedWriter newBufferedWriter = Files.newBufferedWriter(Paths.get("trolls.csv"), Charset.forName("UTF-8"), StandardOpenOption.CREATE); | |
StringBuilder sb = new StringBuilder(); | |
for (Long troll : trolls) { | |
sb.append(String.valueOf(troll)).append("\n"); | |
} | |
newBufferedWriter.write(sb.toString()); | |
newBufferedWriter.close(); | |
//creating a visual representation of the network of trolls with Gephi (https://gephi.org) | |
Queue<EdgeTempBase> edgesToLoadInGraph = new ConcurrentLinkedQueue(); | |
counterLoops = 0; | |
Set<String> keySet = last100StatusesTextPerAuthor.keySet(); | |
keySet.parallelStream().forEach(key1 -> { | |
Set<String> lists1 = last100StatusesTextPerAuthor.get(key1); | |
counterLoops++; | |
if (counterLoops % 100 == 0) { | |
System.out.println("co-members left to examine: " + (keySet.size() - counterLoops)); | |
} | |
keySet.stream().forEach(key2 -> { | |
if (!key1.equals(key2)) { | |
Set<String> lists2 = last100StatusesTextPerAuthor.get(key2); | |
Sets.SetView<String> intersection = Sets.intersection(lists1, lists2); | |
int listsInCommon = intersection.size(); | |
if (listsInCommon >= 1) { | |
if (key1 != null & key2 != null) { | |
edgesToLoadInGraph.add(new EdgeTempBase(key1, key2, (double) listsInCommon)); | |
} | |
} | |
} | |
}); | |
}); | |
System.out.println("edges to load in graph: " + edgesToLoadInGraph.size()); | |
GraphOperations graphOps = new GraphOperations(); | |
graphOps.initANewGraph(); | |
for (EdgeTempBase e : edgesToLoadInGraph) { | |
graphOps.createEdge(e.getSource(), e.getTarget(), e.getWeight(), false); | |
} | |
graphOps.loadGraph(false); | |
graphOps.exportToGexfFile("trolls.gexf"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment