#!/usr/bin/env python
# -*- coding: utf-8 -*-
# CentralAuth broken accounts detector script
# Released under GPL v2 / MIT License
#
# By Legoktm, with contributions from Roan Kattouw, wctaiwan and Earwig
#
# Detects "broken" accounts. See 
# <https://bugzilla.wikimedia.org/show_bug.cgi?id=61876>
#
# Dependencies: python-mysqldb package
#
# Setup:
#  1. Create a ~/sul.my.cnf with username/password/hostname of a database
#     server which has a copy of all SUL databases on it
#  2. Create a file named "wikis.csv" which is a list of all database names
#     that are SUL connected.

import bisect
import calendar
import datetime
from collections import defaultdict, OrderedDict
import os
import time
import MySQLdb
import MySQLdb.cursors


# Taken from pywikibot
class Timestamp(datetime.datetime):

    """Class for handling Mediawiki timestamps.

    This inherits from datetime.datetime, so it can use all of the methods
    and operations of a datetime object.  To ensure that the results of any
    operation are also a Timestamp object, be sure to use only Timestamp
    objects (and datetime.timedeltas) in any operation.

    Use Timestamp.fromISOformat() and Timestamp.fromtimestampformat() to
    create Timestamp objects from Mediawiki string formats.

    Use Site.getcurrenttime() for the current time; this is more reliable
    than using Timestamp.utcnow().

    """
    mediawikiTSFormat = "%Y%m%d%H%M%S"
    ISO8601Format = "%Y-%m-%dT%H:%M:%SZ"

    @classmethod
    def fromISOformat(cls, ts):
        """Convert an ISO 8601 timestamp to a Timestamp object."""
        return cls.strptime(ts, cls.ISO8601Format)

    @classmethod
    def fromtimestampformat(cls, ts):
        """Convert the internal MediaWiki timestamp format to a Timestamp object."""
        return cls.strptime(ts, cls.mediawikiTSFormat)

    def toISOformat(self):
        """Convert the Timestamp object to an ISO 8601 timestamp"""
        return self.strftime(self.ISO8601Format)

    def totimestampformat(self):
        """Convert the Timestamp object to the internal MediaWiki timestamp format."""
        return self.strftime(self.mediawikiTSFormat)

    def __str__(self):
        """Return a string format recognized by the API"""
        return self.toISOformat()

    # This function I didn't steal from pywikibot, it's from
    # http://ruslanspivak.com/2011/07/20/how-to-convert-python-utc-datetime-object-to-unix-timestamp/
    def to_unix(self):
        return calendar.timegm(self.utctimetuple())

    def __add__(self, other):
        newdt = datetime.datetime.__add__(self, other)
        if isinstance(newdt, datetime.datetime):
            return Timestamp(newdt.year, newdt.month, newdt.day, newdt.hour,
                             newdt.minute, newdt.second, newdt.microsecond,
                             newdt.tzinfo)
        else:
            return newdt

    def __sub__(self, other):
        newdt = datetime.datetime.__sub__(self, other)
        if isinstance(newdt, datetime.datetime):
            return Timestamp(newdt.year, newdt.month, newdt.day, newdt.hour,
                             newdt.minute, newdt.second, newdt.microsecond,
                             newdt.tzinfo)
        else:
            return newdt


class SULAuditer:
    def get_db(self, dbname):
        """
        Get a (possibly already open) connection to a database
        """
        if not dbname in self.db_cache:
            self.db_cache[dbname] = MySQLdb.connect(
                db=dbname,
                read_default_file=os.path.expanduser('~/sul.my.cnf'),
                cursorclass=MySQLdb.cursors.DictCursor
            )
        return self.db_cache[dbname]

    def close_db(self, dbname):
        """
        Close the connection if we already opened one
        """
        if dbname in self.db_cache:
            db = self.db_cache.pop(dbname)
            db.close()

    def __init__(self):
        self.db_cache = {}
        self.fname = os.path.expanduser('~/sul/broken-accounts.tsv')
        self.now_utc = Timestamp.utcnow().to_unix()


    @property
    def wikis(self):
        """
        Returns a list of all wikis that are SUL enabled
        """
#        return ['enwikivoyage']  # Uncomment this for fast debugging on a "medium" wiki
        if not hasattr(self, '_wikis'):
            with open(os.path.expanduser('~/wikis.csv')) as f:
                self._wikis = f.read().splitlines()

        return self._wikis

    def handle_local_user_info(self, dbname, res):
        """
        Takes a set of database results, and processes them
        """
        for row in res:
            with open(self.fname, 'a') as f:
                f.write('%s\t%s\n' % (row['user_name'], dbname))

    def get_bulk_local_user_info(self, dbname, limit=5000, last=''):
        """
        Does a massive SQL query to get some basic info
        """
        cur = self.get_db(dbname).cursor()
        t = time.time()
        cur.execute("""
        SELECT
            user_name,
            user_id
        FROM user
        LEFT JOIN centralauth.localuser AS localuser
        ON user.user_name=localuser.lu_name AND lu_wiki=%s
        JOIN centralauth.globaluser AS globaluser
        ON user.user_name=globaluser.gu_name
        WHERE user_id > %s
        AND lu_attached_method IS NULL
        AND user_password = ""
        AND user_email = ""
        AND user_name != "MediaWiki message delivery"
        ORDER BY user_id
        LIMIT %s""", (dbname, last, limit))
        res = cur.fetchall()
        f = time.time() - t
        cur.close()
        #print res
        self.handle_local_user_info(dbname, res)
        if res:
            last_id = res[-1]['user_id']
        else:
            last_id = 0
        print '%s: Fetched up til %s (%s broken), took %s seconds' % (dbname, last_id, len(res), f)
        return len(res), last_id

    def run(self):
        limit = 5000
        for dbname in self.wikis:
            print 'Starting on %s...' % dbname
            count, last_id = self.get_bulk_local_user_info(dbname, limit)
            while count == limit:
                count, last_id = self.get_bulk_local_user_info(dbname, limit, last_id)
            self.close_db(dbname)  # Close our connection since we should be done here.


if __name__ == '__main__':
    audit = SULAuditer()
    audit.run()