Last active
February 26, 2022 11:53
-
-
Save ciencia/f3d02298cdc10823628df46b3a13d42e to your computer and use it in GitHub Desktop.
Grabbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## INSTALL MEDIAWIKI DATABASE | |
php maintenance/install.php --server="http://www.mynewwiki.net" --dbname=grabber --dbserver="localhost:/mnt/tmpgrab/mysql/mysql.sock" --installdbuser=currentuser --installdbpass=password --lang=es --pass=aaaaa --dbuser=grabber --dbpass=grabber --scriptpath=/ WikiName "Admin" | |
## TRUNCATE TABLES PRIOR TO GRABBER | |
mysql --defaults-file=/etc/my-memory.cnf -p | |
truncate table user; | |
truncate table page; | |
truncate table revision; | |
truncate table text; | |
truncate table job; | |
truncate table user_groups; | |
truncate table pagelinks; | |
truncate table externallinks; | |
## CREATE EXTERNAL STORAGE | |
mysql --defaults-file=/etc/my-disk.cnf -p | |
-- Blobs table for external storage | |
CREATE TABLE /*$wgDBprefix*/blobs ( | |
blob_id integer UNSIGNED NOT NULL AUTO_INCREMENT, | |
blob_text longblob, | |
PRIMARY KEY (blob_id) | |
) ENGINE=InnoDB; | |
## GRABBERS - HAVE FUN! | |
# Set end date accordingly and save it | |
php grabText.php --url=http://es.pokemon.wikia.com/api.php --enddate=20170609155229 --namespaces=0 --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabText`date +'%y%m%d%H%M%S'`.log | |
# To update text live, remember to use the same start timestamp than the old end | |
php grabNewText.php --url=http://es.pokemon.wikia.com/api.php --startdate=20170609155229 --enddate=20170728094220 --namespaces=0 --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabNewText`date +'%y%m%d%H%M%S'`.log | |
# Wanna add more namespaces? Use enddate the same as the highest enddate of other grabbers | |
php grabText.php --url=http://es.pokemon.wikia.com/api.php --enddate=20170728094220 --namespaces="14|6" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabText`date +'%y%m%d%H%M%S'`.log | |
# In that case review the namespaces! You must include all namespaces where you have grabbed text previously | |
php grabNewText.php --url=http://es.pokemon.wikia.com/api.php --startdate=20170728094220 --enddate=20170729073845 --namespaces="0|14|6" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabNewText`date +'%y%m%d%H%M%S'`.log | |
# Logs, check filters | |
php grabLogs.php --url=http://es.pokemon.wikia.com/api.php --end=20170729073845 --logtypes="block|protect|rights|delete|upload|move|import|patrol|merge|suppress|abusefilter|renameuser|newusers" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabLogs`date +'%y%m%d%H%M%S'`.log | |
# Files | |
php grabFiles.php --url=http://es.pokemon.wikia.com/api.php --wikia --username=BOT --password=PASSORD --enddate=20170727152601 | tee /home/currentuser/outputGrabFiles20170727.log | |
# Still updating files or logs? | |
sudo -u php-fpm-wikidex php grabNewFiles.php --url=http://es.pokemon.wikia.com/api.php --wikia --username=BOT --password=PASSORD --startdate=20170801203002 --enddate=20170802201102 2>&1 | tee /home/currentuser/outputGrabNewFiles`date +'%y%m%d%H%M%S'`.log | |
sudo -u php-fpm-wikidex php grabLogs.php --url=http://es.pokemon.wikia.com/api.php --start=20170801203002 --end=20170802201102 --logtypes="block|protect|rights|delete|upload|move|import|patrol|merge|suppress|abusefilter|renameuser|newusers" --username=BOT --password=PASSORD 2>&1 | tee /home/currentuser/outputGrabLogs`date +'%y%m%d%H%M%S'`.log | |
# This can be done multiple times to populate some metadata. However, the database must be on its final location, otherwise the ramdisk may be too small for all the tables! | |
sudo -u php-fpm php populateUserTable.php | |
nice sudo -u php-fpm php ../maintenance/rebuildrecentchanges.php --from=20170801203002 --to=20170802201102 | |
nice sudo -u php-fpm php ../maintenance/refreshLinks-mejor.php --changes-since=20170801203002 | |
nice sudo -u php-fpm php ../maintenance/runJobs.php | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Database settings | |
$wgDBtype = "mysql"; | |
$wgDBserver = "localhost:/mnt/tmpgrab/mysql/mysql.sock"; | |
$wgDBname = "grabber"; | |
$wgDBuser = "grabber"; | |
$wgDBpassword = "grabber"; | |
# MySQL table options to use during installation or update | |
$wgDBTableOptions = "ENGINE=InnoDB, DEFAULT CHARSET=binary"; | |
# Periodically send a pingback to https://www.mediawiki.org/ with basic data | |
# about this MediaWiki instance. The Wikimedia Foundation shares this data | |
# with MediaWiki developers to help guide future development efforts. | |
$wgPingback = false; | |
# Site language code, should be one of the list in ./languages/data/Names.php | |
$wgLanguageCode = "es"; | |
# REMEMBER TO SET UP NAMESPACES PROPERLY BEFORE IMPORT!!! | |
# Extra namespaces or some such | |
$wgExtraNamespaces[110] = "Forum"; | |
$wgExtraNamespaces[111] = "Forum_talk"; | |
$wgExtraNamespaces[420] = "Layer"; | |
$wgExtraNamespaces[421] = "Layer_talk"; | |
$wgExtraNamespaces[828] = "Module"; | |
$wgExtraNamespaces[829] = "Module_talk"; | |
$wgExtraNamespaces[1200] = "Muro"; | |
$wgExtraNamespaces[1201] = "Hilo"; | |
$wgExtraNamespaces[1202] = "Bienvenida_del_Muro"; | |
$wgExtraNamespaces[2000] = "Subforo"; | |
$wgExtraNamespaces[2001] = "Tema_del_foro"; | |
$wgExtraNamespaces[2002] = "Tema"; | |
# Namespace aliases | |
$wgNamespaceAliases["Forum"] = 110; | |
$wgNamespaceAliases["Forum talk"] = 111; | |
$wgNamespaceAliases["Message Wall"] = 1200; | |
$wgNamespaceAliases["Thread"] = 1201; | |
$wgNamespaceAliases["Message Wall Greeting"] = 1202; | |
$wgNamespaceAliases["Board"] = 2000; | |
$wgNamespaceAliases["Board Thread"] = 2001; | |
$wgNamespaceAliases["Topic"] = 2002; | |
$wgNamespaceAliases["Usuaria"] = 2; | |
$wgNamespaceAliases["Usuario Discusión"] = 3; | |
$wgNamespaceAliases["Usuaria Discusión"] = 3; | |
# This is necessary to prevent warnings in maintenance scripts about missing SERVER_NAME T172060 | |
$wgOverrideHostname = 'grabber-wikidex'; | |
# EXTERNAL STORAGE | |
$wgExternalStores = array('DB'); | |
$wgExternalServers = array( 'text1' => array ( | |
array( 'host' => 'localhost:/run/mysql/mysql-disk.sock', 'user' => 'grabber_text1', 'password' =>'grabber_text1', 'dbname' => 'grabber_text1', 'type' => "mysql", 'load' => 1 ), | |
) ); | |
$wgDefaultExternalStore = array ( 'DB://text1' ); | |
$wgCompressRevisions = true; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The following options will be passed to all MariaDB clients | |
[client] | |
# password = your_password | |
#port = 3308 | |
socket = /run/mysql/mysql-disk.sock | |
#protocol = TCP | |
[mysqld] | |
pid-file=/run/mysql/mysql-disk.pid | |
innodb_file_format=Barracuda | |
innodb_file_per_table=ON | |
innodb_compression_level=0 | |
innodb_data_home_dir = /home/mysql-disk/mysql/ibdata | |
innodb_data_file_path = ibdata1:10M:autoextend | |
innodb_log_group_home_dir = /home/mysql-disk/mysql/iblog | |
innodb_buffer_pool_size = 80M | |
innodb_log_file_size = 50M | |
innodb_log_buffer_size = 64M | |
innodb_flush_log_at_trx_commit = 0 | |
innodb_lock_wait_timeout = 25 | |
innodb_log_compressed_pages=OFF | |
# logging must be off. Remain commented out | |
# log_bin=mysql-bin | |
# binlog_format=mixed | |
# Remove leading # if you want to store your database elsewhere | |
datadir = /home/mysql-disk/mysql | |
# required unique id between 1 and 2^32 - 1 | |
# defaults to 1 if master-host is not set | |
# but will not function as a master if omitted | |
server-id = 1 | |
# These are commonly set, remove the # and set as required. | |
#port = 3308 | |
socket = /run/mysql/mysql-disk.sock | |
skip-networking | |
sql_mode=NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES | |
max_allowed_packet=24M |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The following options will be passed to all MariaDB clients | |
[client] | |
# password = your_password | |
#port = 3308 | |
socket = /mnt/tmpgrab/mysql/mysql.sock | |
#protocol = TCP | |
[mysqld] | |
pid-file=/mnt/tmpgrab/mysql/mysql.pid | |
# Using newer file format that supports dynamic and compressed row formats. | |
# If you are using replication you have to make sure, that these options are | |
# set everywhere the same way (probably comment them out is the easiest way) | |
innodb_file_format=Barracuda | |
innodb_file_per_table=ON | |
innodb_data_home_dir = /mnt/tmpgrab/mysql/ibdata | |
innodb_data_file_path = ibdata1:10M:autoextend | |
innodb_log_group_home_dir = /mnt/tmpgrab/mysql/iblog | |
innodb_buffer_pool_size = 50M | |
innodb_flush_log_at_trx_commit = 0 | |
innodb_lock_wait_timeout = 25 | |
innodb_log_compressed_pages=OFF | |
# logging must be off. Remain commented out | |
# log_bin=mysql-bin | |
# binlog_format=mixed | |
# Remove leading # if you want to store your database elsewhere | |
datadir = /mnt/tmpgrab/mysql | |
# required unique id between 1 and 2^32 - 1 | |
# defaults to 1 if master-host is not set | |
# but will not function as a master if omitted | |
server-id = 1 | |
# These are commonly set, remove the # and set as required. | |
#port = 3308 | |
socket = /mnt/tmpgrab/mysql/mysql.sock | |
skip-networking | |
sql_mode=NO_ENGINE_SUBSTITUTION,STRICT_TRANS_TABLES | |
max_allowed_packet=24M |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE USER 'currentuser'@'localhost' IDENTIFIED BY 'password'; | |
GRANT ALL ON *.* TO 'currentuser'@'localhost' WITH GRANT OPTION; | |
create database grabber_text1; | |
CREATE USER 'grabber_text1'@'localhost' IDENTIFIED BY 'grabber_text1'; | |
GRANT SELECT,INSERT,UPDATE,DELETE ON grabber_text1 . * TO 'grabber_text1'@'localhost'; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE USER 'currentuser'@'localhost' IDENTIFIED BY 'password'; | |
GRANT ALL ON *.* TO 'currentuser'@'localhost' WITH GRANT OPTION; | |
create database grabber; | |
CREATE USER 'grabber'@'localhost' IDENTIFIED BY 'grabber'; | |
GRANT SELECT,INSERT,UPDATE,DELETE ON grabber . * TO 'grabber'@'localhost'; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
MYSQLBASE=/home/mysql-disk/mysql | |
MYSQLRUN=/run/mysql | |
MYSQLPID=$MYSQLRUN/mysql-disk.pid | |
MYSQLDEFAULTS=/etc/my-disk.cnf | |
function start() { | |
if [ ! -d $MYSQLRUN ]; then | |
mkdir $MYSQLRUN | |
chown -R mysql:mysql $MYSQLRUN | |
fi | |
if [ ! -d $MYSQLBASE ]; then | |
echo | |
echo "Setup mysql" | |
mkdir -p $MYSQLBASE | |
mkdir $MYSQLBASE/ibdata | |
mkdir $MYSQLBASE/iblog | |
chown -R mysql:mysql $MYSQLBASE | |
mysql_install_db --datadir=$MYSQLBASE --defaults-file=$MYSQLDEFAULTS --user=mysql | |
if [ $? -ne 0 ]; then exit 1; fi | |
fi | |
echo | |
echo "Starting mysqld" | |
mysqld_safe --defaults-file=$MYSQLDEFAULTS --datadir=$MYSQLBASE & | |
if [ $? -ne 0 ]; then exit 1; fi | |
sleep 5 | |
echo | |
echo "Creating database and users" | |
mysql --defaults-file=$MYSQLDEFAULTS < ~/setup-mysql-disk.sql | |
echo | |
} | |
function stop() { | |
if [ -e $MYSQLPID ]; then | |
echo "Stopping mysqld" | |
mysqladmin --defaults-file=$MYSQLDEFAULTS shutdown | |
sleep 3 | |
killproc -v -p $MYSQLPID /usr/bin/mysqld_safe | |
fi | |
} | |
case $1 in | |
start) | |
echo | |
start | |
echo "Completed." | |
;; | |
stop) | |
echo | |
stop | |
echo "Completed." | |
;; | |
*) | |
echo "Must specify start or stop." | |
exit 1 | |
;; | |
esac |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
MOUNTPOINT=/mnt/tmpgrab | |
MYSQLBASE=$MOUNTPOINT/mysql | |
MYSQLPID=$MYSQLBASE/mysql.pid | |
MYSQLDEFAULTS=/etc/my-memory.cnf | |
TMPFSMAXSIZE=500m #256m | |
function start() { | |
if [ ! -d $MOUNTPOINT ]; then | |
echo "Creating mount point $MOUNTPOINT" | |
mkdir $MOUNTPOINT | |
if [ $? -ne 0 ]; then exit 1; fi | |
chown root:root $MOUNTPOINT | |
chmod 755 $MOUNTPOINT | |
fi | |
findmnt --mtab --target=$MOUNTPOINT | |
if [ $? -ne 0 ]; then | |
echo "Mounting tmpfs $MOUNTPOINT" | |
mount -t tmpfs -o size=${TMPFSMAXSIZE} tmpfs $MOUNTPOINT | |
if [ $? -ne 0 ]; then exit 1; fi | |
fi | |
if [ ! -d $MYSQLBASE ]; then | |
echo | |
echo "Setup mysql" | |
mkdir $MYSQLBASE | |
mkdir $MYSQLBASE/ibdata | |
mkdir $MYSQLBASE/iblog | |
chown -R mysql:mysql $MYSQLBASE | |
mysql_install_db --datadir=$MYSQLBASE --defaults-file=$MYSQLDEFAULTS --user=mysql | |
if [ $? -ne 0 ]; then exit 1; fi | |
fi | |
echo | |
echo "Starting mysqld" | |
mysqld_safe --defaults-file=$MYSQLDEFAULTS --datadir=$MYSQLBASE & | |
if [ $? -ne 0 ]; then exit 1; fi | |
sleep 5 | |
echo | |
echo "Creating database and users" | |
mysql --defaults-file=$MYSQLDEFAULTS < ~/setup-mysql-mem.sql | |
echo | |
} | |
function stop() { | |
if [ -e $MYSQLPID ]; then | |
echo "Stopping mysqld" | |
mysqladmin --defaults-file=$MYSQLDEFAULTS shutdown | |
sleep 3 | |
killproc -v -p $MYSQLPID /usr/bin/mysqld_safe | |
fi | |
if [ -d $MYSQLBASE ]; then | |
rm -rf $MYSQLBASE | |
fi | |
if [ -d $MOUNTPOINT ]; then | |
echo "Unmounting $MOUNTPOINT" | |
umount $MOUNTPOINT | |
if [ $? -ne 0 ]; then exit 1; fi | |
fi | |
} | |
case $1 in | |
start) | |
echo | |
start | |
echo "Completed." | |
;; | |
stop) | |
echo | |
stop | |
echo "Completed." | |
;; | |
*) | |
echo "Must specify start or stop." | |
exit 1 | |
;; | |
esac |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment