Skip to content

Instantly share code, notes, and snippets.

@aveuiller
Last active August 24, 2017 13:24
Show Gist options
  • Save aveuiller/0ef5b957758bbecf41c2d8380ca5461d to your computer and use it in GitHub Desktop.
Save aveuiller/0ef5b957758bbecf41c2d8380ca5461d to your computer and use it in GitHub Desktop.
Put together Tandoori's tools to provide an automated way of creating a developer smells summary
afwall ukanth/afwall
AmazeFileManager arpitkh96/AmazeFileManager
andFHEM klassm/andFHEM
android-cache-cleaner Frozen-Developers/android-cache-cleaner
Android-IMSI-Catcher-Detector CellularPrivacy/Android-IMSI-Catcher-Detector
Android-Password-Store zeapo/Android-Password-Store
Android-Remote clementine-player/Android-Remote
andstatus andstatus/andstatus
Anki-Android ankidroid/Anki-Android
AnyMemo helloworld1/AnyMemo
apps-android-wikipedia wikimedia/apps-android-wikipedia
Atomic indrora/Atomic
bankdroid liato/android-bankdroid
berlin-vegan-guide Berlin-Vegan/berlin-vegan-guide
bitcoin-wallet schildbach/bitcoin-wallet
book-catalogue eleybourn/Book-Catalogue
calendar-widget plusonelabs/calendar-widget
CampFahrplan tuxmobil/CampFahrplan
chanu grzegorznittner/chanu
clean-status-bar emmaguy/clean-status-bar
Clover Floens/Clover
connectbot connectbot/connectbot
Conversations siacs/Conversations
Dictionary rdoeffinger/Dictionary
document-viewer SufficientlySecure/document-viewer
duckduckgo duckduckgo/Android
external_jbirdvegas_mGerrit JBirdVegas/external_jbirdvegas_mGerrit
Flym FredJul/Flym
forecastie martykan/forecastie
Gadgetbridge Freeyourgadget/Gadgetbridge
gisapp nextgis/android_gisapp
gobandroid ligi/gobandroid
hn-android manmal/hn-android
k-9 k9mail/k-9
kontalk-androidclient kontalk/androidclient
KISS Neamar/KISS
Kore xbmc/Kore
Lightning-Browser anthonycr/Lightning-Browser
lumicall opentelecoms-org/lumicall
meeting-room futurice/meeting-room-tablet
MiMangaNu raulhaag/MiMangaNu
MozStumbler mozilla/MozStumbler
nextcloud nextcloud/android
news-android-app owncloud/News-Android-App
notepad openintents/notepad
Omni-Notes federicoiosue/Omni-Notes
opacclient opacapp/opacclient
open-event-android fossasia/open-event-android
OpenBikeSharing bparmentier/OpenBikeSharing
openbmap wish7code/openbmap
opentasks dmfs/opentasks
opentraining chaosbastler/opentraining
osmeditor4android MarcusWolschon/osmeditor4android
orWall EthACKdotOrg/orWall
owncloud owncloud/android
ownCloud-SMS-App nerzhul/ownCloud-SMS-App
Pindroid maxpower47/PinDroid
PixelKnot guardianproject/PixelKnot
QuasselDroid sandsmark/QuasselDroid
qksms moezbhatti/qksms
QuickLyric QuickLyric/QuickLyric
RedReader QuantumBadger/RedReader
seadroid haiwen/seadroid
SGit sheimi/SGit
share_to_clipboard tengusw/share_to_clipboard
shoppinglist openintents/shoppinglist
sigfood cody82/sigfood
Silence SilenceIM/Silence
Slide ccrama/Slide
sls tgwizard/sls
spacecowboy-NotePad spacecowboy/NotePad
StockTicker premnirmal/StockTicker
Subsonic daneren2005/Subsonic
swiftp ppareit/swiftp
syncthing syncthing/syncthing-android
Tachiyomi inorichi/tachiyomi
tickmate lordi/tickmate
Timber naman14/Timber
transdroid erickok/transdroid
transdroid-search erickok/transdroid-search
Transistor y20k/transistor
Transportr grote/Transportr
TransportsRennes ybonnel/TransportsRennes
ttrss-reader-fork nilsbraden/ttrss-reader-fork
Vanilla vanilla-music/vanilla
wallabag-android-app wallabag/android-app
Weechat ubergeek42/weechat-android
WifiAnalyzer VREMSoftwareDevelopment/WifiAnalyzer
wigle-wifi-wardriving wiglenet/wigle-wifi-wardriving
wikijourney_app WikiJourney/wikijourney_app
xabber-android redsolution/xabber-android
Yaaic pocmo/Yaaic
#!/bin/sh
appsFile="allApps.csv"
outDir=`realpath output`
inputDir=`realpath output` # We scan back every existing output dirs to update metrics
dbDir=`realpath databases`
tmpDir="/tmp/tandoori"
mkdir -p $tmpDir
tandoori="Tandoori.jar"
##
# Call Tandoori for the given application
#
# $1 - application name
# $2 - Tandoori request type
##
function tandooriRequest {
java -Xss512m -jar $tandoori query -r $2 -db "$dbDir/$1/databases/graph.db" -d true
}
##
# Start the metrics calculation for the given app
#
# $1 - application name
##
function retrieveNbMethodsAndClasses {
appName=$1
echo "# Computing methods and classes for project $appName"
tandooriRequest $appName ALLNUMMETHODS
tandooriRequest $appName COUNTVAR
tandooriRequest $appName COUNTINNER
tandooriRequest $appName COUNTASYNC
tandooriRequest $appName COUNTVIEWS
}
##
# Returns 1 if the given word is present in the string,
# returns 0 otherwise.
#
# $1 - The word to analyze
# $2 - The string to check
##
function isWordPresent {
echo "$2" | grep -E "$1" | wc -l
}
##
# Retrieve commits and their classification regarding actions
# (fix, refactor, feature, ...)
#
# $1 - application name
# $2 - Application path on GitHub
##
function sortCommitByTypes {
appPath=$2
gitDir="/tmp/$appName-git"
logFile=`realpath "$appName-commits"`
resultFile="$logFile-results"
echo "project,commit,feat,fix,docs,style,refactor,perf,tests,chores" > $resultFile
COMMIT_SEPARATOR="أ"
if [[ ! -d $gitDir ]]; then
git clone https://github.com/$appPath $gitDir
fi
cd $gitDir
git log --topo-order --reverse --pretty="%H$COMMIT_SEPARATOR%s %B" > $logFile
cd -
currentCommit=""
currentContent=""
cat $logFile | while IFS=$COMMIT_SEPARATOR read commit content; do
if [[ -z $commit && -z $content ]];then
echo "Skipping empty line"
else
if [[ -z $content ]]; then
# We have to concatenate some body which has been set to commit
currentContent="$currentContent $commit"
else
if [[ ! -z $currentCommit ]]; then
# We analyze the currently gathered commit
# analyzeCommitLanguage $commit $content
feat=`isWordPresent 'feat' $currentContent`
fix=`isWordPresent 'fix|debug' $currentContent`
docs=`isWordPresent 'docs|documentation' $currentContent`
style=`isWordPresent 'style' $currentContent`
refactor=`isWordPresent 'refactor' $currentContent`
perf=`isWordPresent 'perf' $currentContent`
tests=`isWordPresent 'test' $currentContent`
chores=`isWordPresent 'chore' $currentContent`
# Outputing result
echo "$appPath,$currentCommit,$feat,$fix,$docs,$style,$refactor,$perf,$tests,$chores" >> $resultFile
fi
# We assign the new line to our new current commit
echo "Analyzing new commit $commit"
currentCommit=$commit
currentContent=$content
fi
fi
done
}
##
# Retrieve methods and classes,
# then sort commits with keywords
#
# $1 - application name
# $2 - Application path on GitHub
##
function compute {
appName=$1
appPath=$2
echo "##### Starting process for $appName ####"
tmpOutput="$tmpDir/$appName"
finalOutput="$outDir/$appName"
mkdir -p $tmpOutput; cd $tmpOutput
retrieveNbMethodsAndClasses $appName
sortCommitByTypes $appName $appPath
cd -
echo "# Removing previous result and moving new"
rm -rf $finalOutput
mv $tmpOutput $finalOutput
}
if [[ -f $1 ]]; then
# If we have a file as input argument (csv) we read it and do the given applications
echo "Using input file $1"
cat $1 | while IFS=, read name path; do
if [[ -z $name && -z $path ]];then
echo "Skipping empty line"
else
compute $name $path
fi
done
else
# If no input is given we scan every available inputs
for db in $(ls $inputDir); do
appName=$(basename $db)
appPath=$(grep -- $appName $appsFile | cut -d, -f2)
compute $appName $appPath
done
fi
#!/bin/sh
# A POSIX variable
###### This loop script can be used to launch every analysis
#
##!/bin/sh
#
#appsFile="apps.csv"
#outDir="output"
#inputDir="databases"
#githubKeyFile="./githubKey"
#
#for db in $(ls $inputDir); do;
# appName=$(baseName $db)
# appPath=$(grep -- $appName $appsFile)
# echo "# Computing metrics for project $appName - $appPath"
# ./devNote.sh -d $db -p $appPath -k "$(cat $githubKeyFile)" -o $appName
#done
#######
OPTIND=1 # Reset in case getopts has been used previously in the shell.
# Initialize our own variables:
verbose=0
appDB=""
project=""
outputDir=$(date)
githubAPIKey=""
function show_help {
echo -e "$(basename $0): Transform the raw application database to developer rating\n
Usage: $(basename $0) [-h] [-v] -d inputDatabase -k apiKey -p username/project -o outputDir
-h Print this help message
-v Verbose
-d application database to use as input
-p GitHub identifier of the analyzed project
-k GitHub API key
-o Output directory.
" >&2
}
# Parse args
while getopts "h?vo:d:o:p:k:" opt; do
case "$opt" in
h|\?)
show_help
exit 0
;;
v) verbose=1
;;
p) project=$OPTARG
;;
d) appDB=$(realpath $OPTARG)
;;
k) githubAPIKey=$OPTARG
;;
o) outputDir=$(realpath $OPTARG)
;;
esac
done
shift $((OPTIND-1))
[ "$1" = "--" ] && shift
# Check mandatory arguments
if [ -z $appDB ]; then
echo "Missing argument: '-d'"
show_help
exit 2;
fi
if [ -z $project ]; then
echo "Missing argument '-p'"
show_help
exit 2;
fi
if [ -z $githubAPIKey ]; then
echo "Missing argument '-k'"
show_help
exit 2;
fi
if [[ -f $outputDir ]]; then
echo "$outputDir is a file, cannot proceed"
exit 2
fi
# Parsing done, starting out script
WORKDIR=$(dirname $(realpath -s $0))
GIT_MINER="$WORKDIR/GitMiner.jar"
METRICS_CALC="$WORKDIR/MetricsCalculator.jar"
TANDOORI="$WORKDIR/Tandoori.jar"
##
# Query Tandoori jar to process the smells for a given application.
# We are setting a bigger stacktrace size in this method (512Mo).
#
# $1 - The application database to query
# $2 - The output directory
##
function tandooriQuery {
appDB="$1"
smellsDir="$2"
mkdir -p "$smellsDir"
[[ $verbose -eq "0" ]] || echo "## Will use database: $appDB, file: $(ls $appDB/..)"
cd "$smellsDir" # We can't provide an output directory to TANDOORI unfortunately
java -Xss512m -jar $TANDOORI query -r NONFUZZY -db "$1/databases/graph.db" -d true
cd -
}
##
# Retrieve the developpers associated to the given project
# and serialize the results in a database and a CSV.
#
# $1 - The project identifier on GitHub (i.e. 'username/project')
# $2 - The GitHub API key
# $3 - The output directory
##
function findDevelopers {
apiUrl="https://api.github.com/repos/$1"
devOutputDir=$3
mkdir -p "$devOutputDir"
cd $devOutputDir
[[ $verbose -eq "0" ]] || echo "## Will contact Github API with url: $apiUrl"
java -jar $GIT_MINER getCommits -l "$apiUrl" -k $2 -d "$devOutputDir/database"
cd -
}
##
# Find the developer associated to each commit in smell files
# and add a row with his ID.
#
# $1 - Directory containing smells files
# $2 - File containing developers output
##
function addDeveloperRow {
csvFile=$2
# The header 'key' being found, the "id" element is added on the smells CSV.
cat $csvFile | while IFS=, read hash name id email; do
[[ $verbose -eq "0" ]] || echo "## Putting developer $id for commit $hash"
sed -E -i "s/(.*$hash.*)/\1,$id/" $1/*.csv
done
}
##
# Analyse the smell results in order to give number of Introduced/Refactored smells per developer
#
# $1 - The directory containing smell results
# $2 - smell summary output file
# $3 - GitMiner output containing project commits
# $4 - GitHub project identifier to set in output
# $5 - Project logs in correct order
# $6 - Project database containing Tandoori analysis
##
function metricsCalculation {
java -jar $METRICS_CALC -d "$1" -o "$2" -c "$3" -p "$4" -l "$5" -db "$6"
}
##
# Clone the project and sort the commits in topological order
#
# $1 - Project path on GitHub
# $2 - output file
##
function topologicalCommitsOrder {
project=$1
logFile=`realpath $2`
repoPath="/tmp/git/$project"
if [[ ! -d $repoPath ]]; then
git clone github.com:$project $repoPath
fi
cd $repoPath
git log --topo-order --reverse --pretty="%H" > $logFile
cd -
}
[[ $verbose -eq "0" ]] || echo "## verbose=$verbose, appDB=$appDB, outputDir=$outputDir, project: $project, apiKey=$githubAPIKey, Leftovers: $@"
appDB="$appDB/databases/graph.db"
echo "Using appDB $appDB"
smellsDir="$outputDir/smells"
[[ $verbose -eq "0" ]] || echo "## Temporary results for Tandoori will be in $smellsDir"
echo "# Cleaning previous smells"
rm -rf $smellsDir
echo "# Parsing project database to find smells"
tandooriQuery "$appDB" "$smellsDir"
devDir="$outputDir/devs"
devFile="$devDir/COMMITS.csv" # TODO: Can we define a custom output file?
[[ $verbose -eq "0" ]] || echo "## Temporary results for devs will be in $devDir"
if [[ -f $devFile ]];then
echo "# Developpers already present in $devFile"
else
echo "# Retrieving project developers profiles on github"
findDevelopers "$project" "$githubAPIKey" "$devDir"
fi
echo "# Merging developers with smells files"
addDeveloperRow "$smellsDir" "$devFile"
echo "# Creating log file with topological order"
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
logFile="/tmp/commits-$timestamp"
topologicalCommitsOrder $project $logFile
echo "# Generating global metrics file"
metricsDir="$outputDir/metrics"
metricsCalculation "$smellsDir" "$metricsDir" "$devFile" "$project" "$logFile" "$appDB"
echo "Done; output can be find in $metricsDir"
#!/usr/bin/python3
# Merge _analyzed_ commits with tagged ones
# Python >3.5 needed
import csv
import os
def isTagged(entry):
return (int(entry["feat"]) + int(entry["fix"]) + int(entry["docs"])
+ int(entry["style"]) + int(entry["refactor"]) + int(entry["perf"])
+ int(entry["tests"]) + int(entry["chores"])
) >= 1
def sumCommits(path):
commitSmells = {}
with open(path, 'r') as metricsFile:
metrics = csv.reader(metricsFile)
for row in metrics:
# If we hit the first row or an empty one, we skip it
if len(row) == 0 or row[0] == "commitNumber":
continue
# For each smell and dev we have the sequence I,R,D. Starting at third column
introduction = sum(list(map(int, row[3::3])))
refactor = sum(list(map(int, row[4::3])))
deletion = sum(list(map(int, row[5::3])))
# row[1] is commit sha
commitSmells[row[1]] = {"I": introduction, "R": refactor, "D": deletion, "R_D": refactor + deletion}
return commitSmells
csvPrefix = "output"
tagsSuffix = "-commits-results"
smellsSuffix = "metrics/metrics-perDev-perCommit-perSmell.csv"
taggedOutput = "completeTags-onlyTagged.csv"
output = "completeTags.csv"
header_line = ["project", "commit", "feat", "fix", "docs", "style", "refactor", "perf", "tests",
"chores", "I", "R", "D", "R_D"]
with open(output, "w") as outputFile, open(taggedOutput, "w") as taggedOutputFile:
writer = csv.DictWriter(outputFile, header_line)
taggedWriter = csv.DictWriter(taggedOutputFile, header_line)
writer.writeheader()
taggedWriter.writeheader()
for directory in os.listdir(csvPrefix):
tagsPath = csvPrefix + "/" + directory + "/" + directory + tagsSuffix
metricsPath = csvPrefix + "/" + directory + "/" + smellsSuffix
if not (os.path.exists(tagsPath) and os.path.exists(metricsPath)):
print("Skipping " + directory)
continue
commitSmells = sumCommits(metricsPath)
# Counting occurrences of smells
with open(tagsPath, 'r') as tagsfile:
tags = csv.DictReader(tagsfile)
nbCommits = 0
nbTaggedCommits = 0
for entry in tags:
sha = entry["commit"]
if sha in commitSmells:
smells = commitSmells[sha]
nbCommits += 1
writer.writerow({**entry, **smells})
if isTagged(entry):
taggedWriter.writerow({**entry, **smells})
nbTaggedCommits += 1
print("Project: " + directory + " - " + str(nbCommits) + " commits analyzed, including " + str(nbTaggedCommits) + " tagged")
print("Done")
#!/bin/sh
appsFile="allApps.csv"
outDir="output"
inputDir="output" # We scan back every existing output dirs to update metrics
dbDir="databases"
githubKeyFile="./githubKey"
tmpDir="/tmp/tandoori"
mkdir -p $tmpDir
##
# Start the metrics calculation for the given app
#
# $1 - application name
# $2 - application path on GitHub
##
function startDevNote {
appName=$1
appPath=$2
echo "##### Starting process for $appName ####"
tmpOutput="$tmpDir/$appName"
finalOutput="$outDir/$appName"
echo "Re-using metrics and developers to compute new metrics ($finalOutput -> $tmpOutput)"
cp -r $finalOutput $tmpOutput
echo "# Computing metrics for project $appName - $appPath"
./devNote.sh -d "$dbDir/$appName" -p "$appPath" -k "$(cat $githubKeyFile)" -o "$tmpOutput"
echo "# Removing previous result and moving new"
rm -rf $finalOutput
mv $tmpOutput $finalOutput
}
if [[ -f $1 ]]; then
# If we have a file as input argument (csv) we read it and do the given applications
echo "Using input file $1"
cat $1 | while IFS=, read name path; do
if [[ -z $name && -z $path ]];then
echo "Skipping empty line"
else
startDevNote $name $path
fi
done
else
# If no input is given we scan every available inputs
for db in $(ls $inputDir); do
appName=$(basename $db)
appPath=$(grep -- $appName $appsFile | cut -d, -f2)
startDevNote $appName $appPath
done
fi
./packResults.sh
#!/bin/sh
#echo "Retrieving results"
#cp -r /run/media/antoine/Maxtor/tandoori-metrics/output/* results/
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
outDir="packages"
fullTarName="$outDir/results-$timestamp.tgz"
metricsTarName="$outDir/metrics-$timestamp.tgz"
echo "Saving results backup int $fullTarName"
tar caf "$fullTarName" output/*
echo "Creating metrics tarball $metricsTarName"
tar caf "$metricsTarName" output/*/metrics output/*/*commits*
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment