Skip to content

Instantly share code, notes, and snippets.

@jhejderup
Last active January 13, 2020 11:39
Show Gist options
  • Save jhejderup/795524106c1497f0770940169f3de5ec to your computer and use it in GitHub Desktop.
Save jhejderup/795524106c1497f0770940169f3de5ec to your computer and use it in GitHub Desktop.
GHTorrent extracting dependabot records from the events collection
db.getCollection('events').aggregate([
{
$sort: {_id: -1}
},{
$match: {
"$and": [
{"type": {$eq: "PullRequestEvent"}},
{"payload.pull_request.head.repo.language": { $eq: "Java"}},
{"actor.id": {$eq: 27856297 }},
]}
},{
$lookup: {
from: "commits",
localField: "payload.pull_request.head.sha",
foreignField: "sha",
as: "head"}
},{
$match: {
"$and": [
{"head": { $size: 1}},
]}
},{
$limit: 20
},
]).forEach(doc => {
if(doc.head[0].parents.length === 1 && doc.head[0].files.length === 1){
if(doc.head[0].files[0].filename.includes('pom.xml')){
print(doc.payload.pull_request.created_at+";"+doc.repo.name+";"+doc.head[0].parents[0].sha+";"+doc.head[0].files[0].filename+";"+doc.head[0].files[0].patch)
}
}
})
///
///
///
var results = [];
db.getCollection('events').aggregate([
{
$sort: {_id: -1}
},{
$match: {
"$and": [
{"type": {$eq: "PullRequestEvent"}},
{"payload.pull_request.head.repo.language": { $eq: "Java"}},
{"actor.id": {$eq: 27856297 }},
]}
},{
$lookup: {
from: "commits",
localField: "payload.pull_request.head.sha",
foreignField: "sha",
as: "head"}
},{
$match: {
"$and": [
{"head": { $size: 1}},
]}
},{
$limit: 20
},
]).forEach(doc => {
if(doc.head[0].parents.length === 1 && doc.head[0].files.length === 1){
if(doc.head[0].files[0].filename.includes('pom.xml') && doc.head[0].files[0].patch.includes('<version>') && doc.head[0].files[0].patch.includes('</version>')){
results.push({ts:doc.payload.pull_request.created_at, slug:doc.repo.name, sha:doc.head[0].parents[0].sha, path:doc.head[0].files[0].filename,patch: JSON.stringify(doc.head[0].files[0].patch)})
}
}
});
printjson(results);
///
///
///
var results = [];
db.getCollection('events').aggregate([
{
$sort: {_id: -1}
},{
$match: {
"$and": [
{"type": {$eq: "PullRequestEvent"}},
{"payload.pull_request.head.repo.language": { $eq: "Java"}},
{"actor.id": {$eq: 27856297 }},
]}
},{
$lookup: {
from: "commits",
localField: "payload.pull_request.head.sha",
foreignField: "sha",
as: "head"}
},{
$match: {
"$and": [
{"head": { $size: 1}},
]}
},{
$limit: 10
},
]).forEach(doc => {
if(doc.head[0].parents.length === 1 && doc.head[0].files.length === 1){
if(doc.head[0].files[0].filename.includes('pom.xml') && doc.head[0].files[0].patch.includes('<version>') && doc.head[0].files[0].patch.includes('</version>')){
results.push({number:doc.payload.pull_request.number, url:doc.payload.pull_request.url, title:doc.payload.pull_request.title, state:doc.payload.pull_request.state, ts:doc.payload.pull_request.created_at, slug:doc.repo.name, sha:doc.head[0].parents[0].sha, path:doc.head[0].files[0].filename,patch: JSON.stringify(doc.head[0].files[0].patch)})
}
}
});
printjson(results);
@jhejderup
Copy link
Author

Pipeline explanation:

  1. Sort the "events" collection to have look at the latest records first
  2. Match records that are of the type "PullRequestEvent" with repo language "Java" and created by "dependabot"
  3. Join with the "commits" collection to get information about file and patch. Dependabot creates only one commit
  4. Check if mongo was able to find the dependabot commit referenced in the pull request
  5. Limit the number of records we want (change this parameter manually)
  6. ForEach clause: make sure there is only parent commit and one file changed, then we check that the changed file is a pom.xml file
  7. Dump everything as a json file

@jhejderup
Copy link
Author

jhejderup commented Dec 16, 2019

Run on ght:

git clone https://gist.github.com/jhejderup/795524106c1497f0770940169f3de5ec dependabot_mongo_scripts
cd  dependabot_mongo_scripts
screen
time mongo --quiet github < ght_dependabot_json.js >> dependabot_pr.json

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment