Last active
October 23, 2018 23:04
-
-
Save esimonetti/669ed488e0e28c093de8b863a9bfa133 to your computer and use it in GitHub Desktop.
The latests up to date project is located here: https://github.com/esimonetti/SugarActivityStreamPurger. CLI script to clean activity streams to keep only the last N months, and keep only activities of records that are still valid. A more aggressive and faster version of this script can be found here: https://gist.github.com/esimonetti/39f65396f…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Enrico Simonetti | |
// enricosimonetti.com | |
// | |
// 2017-12-13 on Sugar 7.9.2.0 | |
// | |
// CLI script to clean activity streams to keep only the last N months, and keep only activities of records that are still valid | |
// | |
// TEST, TEST, TEST AND USE AT YOUR OWN RISK! | |
// | |
// For big systems it is recommended to add the indexes below, and then remove them once completed. It is also recommended to start | |
// with a longer window of time (eg: --keep-months=24 ) to gradually reduce the number of records, and purging completely the soft deleted records from the database | |
// To add indexes on MySQL: | |
// create index idx_act_del_type_entered on activities (deleted, activity_type, date_entered); | |
// create index idx_actuser_del on activities_users (deleted); | |
// create index idx_actuser_actid on activities_users (activity_id); | |
// To remove indexes: | |
// drop index idx_act_del_type_entered on activities; | |
// drop index idx_actuser_del on activities_users; | |
// drop index idx_actuser_actid on activities_users; | |
function usage($error = '') { | |
if (!empty($error)) print(PHP_EOL . 'Error: ' . $error . PHP_EOL); | |
print(' php ' . __FILE__ . ' --instance /full/path [--keep-months=6] [--deep-clean] [--force-db-delete]' . PHP_EOL); | |
exit(1); | |
} | |
// only allow CLI | |
$sapi_type = php_sapi_name(); | |
if (substr($sapi_type, 0, 3) != 'cli') { | |
die(__FILE__ . ' is CLI only.'); | |
} | |
// get command line params | |
$o = getopt('', array('instance:', 'keep-months::', 'deep-clean', 'force-db-delete')); | |
if (!$o) usage(); | |
// find directory | |
if (!empty($o['instance']) && is_dir($o['instance'])) { | |
print('Debug: Entering directory ' . $o['instance'] . PHP_EOL); | |
chdir($o['instance']); | |
} else { | |
chdir(dirname(__FILE__)); | |
} | |
if (!file_exists('config.php') || !file_exists('sugar_version.php')) { | |
usage('The provided directory is not a Sugar system'); | |
} | |
// sugar basic setup | |
define('sugarEntry', true); | |
require_once('include/entryPoint.php'); | |
if (extension_loaded('xdebug')) { | |
echo 'Xdebug is enabled on this system. It is highly recommended to disable Xdebug on PHP CLI before running this script. Xdebug will cause unwanted slowness.'.PHP_EOL; | |
} | |
// temporarily stop xdebug, xhprof and tideways if enabled | |
if (function_exists('xdebug_disable')) { | |
xdebug_disable(); | |
} | |
if (function_exists('xhprof_disable')) { | |
xhprof_disable(); | |
xhprof_sample_disable(); | |
} | |
if (function_exists('tideways_disable')) { | |
tideways_disable(); | |
} | |
if (empty($current_language)) { | |
$current_language = $sugar_config['default_language']; | |
} | |
$app_list_strings = return_app_list_strings_language($current_language); | |
$app_strings = return_application_language($current_language); | |
$mod_strings = return_module_language($current_language, 'Administration'); | |
global $current_user; | |
$current_user = BeanFactory::getBean('Users'); | |
$current_user->getSystemUser(); | |
echo 'Trimming activity streams...' . PHP_EOL; | |
$results = array( | |
'activities' => 0, | |
'activities_users' => 0, | |
'comments' => 0, | |
); | |
$db = DBManagerFactory::getInstance(); | |
// months to keep | |
$months_to_keep = 12; | |
if (!empty($o['keep-months'])) { | |
if ($o['keep-months'] > 0) { | |
$months_to_keep = $o['keep-months']; | |
} | |
if ($o['keep-months'] == 'none') { | |
$months_to_keep = 0; | |
} | |
} | |
echo 'The activity stream records older than ' . $months_to_keep . ' months, will be deleted.' . PHP_EOL; | |
$force_db_delete = false; | |
if (isset($o['force-db-delete'])) { | |
$force_db_delete = true; | |
echo 'The records will be completely REMOVED from the database.' . PHP_EOL; | |
} else { | |
echo 'The records will be soft deleted ( deleted = 1 ).' . PHP_EOL; | |
} | |
$deep_clean = false; | |
if (isset($o['deep-clean'])) { | |
$deep_clean = true; | |
echo 'Deep record by record cleaning is going to be performed.' . PHP_EOL; | |
} | |
$line = readline('Based on the above warnings, do you want to proceed? (Type yes/no and enter)' . PHP_EOL); | |
if (strtolower($line) != 'yes') { | |
die('Script interrupted' . PHP_EOL); | |
} | |
$start_time = microtime(true); | |
// find all the activities items we should not touch as they are related to a comment | |
$activities_to_keep = array(); | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->select(array('parent_id'))->from('comments'); | |
$b1->where("deleted = '0'"); | |
echo 'Executing '.$b1->getSQL().PHP_EOL; | |
$res = $b1->execute(); | |
while ($row = $res->fetch()) { | |
// before adding them to the list, check if the actual activity's parent record is not deleted, otherwise they can go | |
$activity = BeanFactory::getBean('Activities', $row['parent_id']); | |
if (!empty($activity->id) && !empty($activity->parent_id) && !empty($activity->parent_type)) { | |
$bean = BeanFactory::getBean($activity->parent_type, $activity->parent_id); | |
if (!empty($bean->id)) { | |
$activities_to_keep[$row['parent_id']] = $row['parent_id']; | |
} | |
} | |
echo '.'; | |
} | |
echo PHP_EOL; | |
// delete every activities record older than N months, without comments | |
$date_entered_keep = gmdate('Y-m-d H:i:s', mktime(date('H'), date('i'), date('s'), date('m')-$months_to_keep, date('d'),date('Y'))); | |
$act_candidates_for_delete = array(); | |
// get all the activities that are not a user's post, that are too old | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->select(array('id'))->from('activities'); | |
$b1->where("deleted = '0'"); | |
$b1->andWhere('activity_type != '.$b1->createPositionalParameter('post')); | |
$b1->andWhere('date_entered < '.$b1->createPositionalParameter($date_entered_keep)); | |
echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$res = $b1->execute(); | |
while ($row = $res->fetch()) { | |
$act_candidates_for_delete[$row['id']] = $row['id']; | |
echo '.'; | |
} | |
echo PHP_EOL; | |
// now get all the activities that are post, and check if their parent record is deleted | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->select(array('id', 'parent_id', 'parent_type'))->from('activities'); | |
$b1->where("deleted = '0'"); | |
$b1->andWhere('activity_type = '.$b1->createPositionalParameter('post')); | |
echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$res = $b1->execute(); | |
while ($row = $res->fetch()) { | |
if (!empty($row['id']) && !empty($row['parent_id']) && !empty($row['parent_type'])) { | |
// see if i can access the bean | |
$bean = BeanFactory::getBean($row['parent_type'], $row['parent_id']); | |
if (empty($bean->id)) { | |
// can delete | |
$act_candidates_for_delete[$row['id']] = $row['id']; | |
} | |
echo '.'; | |
} | |
} | |
echo PHP_EOL; | |
if (!empty($act_candidates_for_delete)) { | |
foreach ($act_candidates_for_delete as $activity_id) { | |
// if it is not something we need to keep... | |
if (!in_array($activity_id, $activities_to_keep)) { | |
if (!$force_db_delete) { | |
// mark as deleted the matching activities_users | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->update('activities_users'); | |
$b1->set('deleted', '1'); | |
$b1->where('activity_id = ' . $b1->createPositionalParameter($activity_id)); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
// mark as deleted this activity | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->update('activities'); | |
$b1->set('deleted', '1'); | |
$b1->where('id = ' . $b1->createPositionalParameter($activity_id)); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
} else { | |
// delete the matching activities_users | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->delete('activities_users'); | |
$b1->where('activity_id = ' . $b1->createPositionalParameter($activity_id)); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
// delete this activity | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->delete('activities'); | |
$b1->where('id = ' . $b1->createPositionalParameter($activity_id)); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
} | |
$results['activities_users']++; | |
$results['activities']++; | |
echo '..'; | |
} | |
} | |
} | |
echo PHP_EOL; | |
if ($deep_clean) { | |
// find comments without a valid activities record, that can be deleted | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->select(array('com.id'))->from('comments', 'com'); | |
$b1->leftJoin('com', 'activities', 'act', 'com.parent_id = act.id'); | |
$b1->where("com.deleted = '0'"); | |
$b1->andWhere('act.id is null'); | |
echo 'Executing '.$b1->getSQL().PHP_EOL; | |
$res = $b1->execute(); | |
while ($row = $res->fetch()) { | |
if (!$force_db_delete) { | |
// mark as deleted the comments without activities | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->update('comments'); | |
$b1->set('deleted', '1'); | |
$b1->where('id = ' . $b1->createPositionalParameter($row['id'])); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
} else { | |
// delete the comments without activities | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->delete('comments'); | |
$b1->where('id = ' . $b1->createPositionalParameter($row['id'])); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
} | |
$results['comments']++; | |
echo '.'; | |
} | |
echo PHP_EOL; | |
// find activities_users without an activity_id, that can be deleted | |
$act_users_to_delete = array(); | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->select(array('au.id'))->from('activities_users', 'au'); | |
$b1->leftJoin('au', 'activities', 'act', 'au.activity_id = act.id'); | |
$b1->where("au.deleted = '0'"); | |
$b1->andWhere('act.id is null'); | |
echo 'Executing '.$b1->getSQL().PHP_EOL; | |
$res = $b1->execute(); | |
while ($row = $res->fetch()) { | |
$act_users_to_delete[$row['id']] = $row['id']; | |
echo '.'; | |
} | |
echo PHP_EOL; | |
// find activities_users with a deleted parent_type/parent_id, that can be deleted | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->select(array('id', 'parent_id', 'parent_type'))->from('activities_users'); | |
$b1->where("deleted = '0'"); | |
echo 'Executing '.$b1->getSQL().PHP_EOL; | |
$res = $b1->execute(); | |
while ($row = $res->fetch()) { | |
if (!empty($row['id']) && !empty($row['parent_id']) && !empty($row['parent_type'])) { | |
// see if i can access the bean | |
$bean = BeanFactory::getBean($row['parent_type'], $row['parent_id']); | |
if (empty($bean->id)) { | |
// can delete | |
$act_users_to_delete[$row['id']] = $row['id']; | |
} | |
echo '.'; | |
} | |
} | |
echo PHP_EOL; | |
if (!empty($act_users_to_delete)) { | |
foreach ($act_users_to_delete as $act_users_id) { | |
if (!$force_db_delete) { | |
// mark as deleted the matching activities_users | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->update('activities_users'); | |
$b1->set('deleted', '1'); | |
$b1->where('id = ' . $b1->createPositionalParameter($act_users_id)); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
} else { | |
// delete the matching activities_users | |
$b1 = $db->getConnection()->createQueryBuilder(); | |
$b1->delete('activities_users'); | |
$b1->where('id = ' . $b1->createPositionalParameter($act_users_id)); | |
//echo 'Executing '.$b1->getSQL().' '.print_r($b1->getParameters(), true).PHP_EOL; | |
$b1->execute(); | |
} | |
$results['activities_users']++; | |
echo '.'; | |
} | |
} | |
echo PHP_EOL; | |
} | |
echo 'Summary of records removed: ' . PHP_EOL; | |
foreach ($results as $heading => $value) { | |
echo $value . ' ' . $heading . PHP_EOL; | |
} | |
echo 'Note that the activities_users table summary might now be fully accurate, as for every record there can be multiple user activities records deleted' . PHP_EOL; | |
echo PHP_EOL; | |
print('Completed in ' . (int)(microtime(true) - $start_time) . ' seconds.' . PHP_EOL); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment