Skip to content

Instantly share code, notes, and snippets.

@WebDragon
Created August 30, 2019 18:10
Show Gist options
  • Select an option

  • Save WebDragon/d0db898acb41eefaa8deb84921a77d42 to your computer and use it in GitHub Desktop.

Select an option

Save WebDragon/d0db898acb41eefaa8deb84921a77d42 to your computer and use it in GitHub Desktop.
custom perl code to track differences in CiviCRM template overrides vs their originating counterparts in revision control (both git and the previous svn repositories)
#!/usr/bin/perl
use warnings;
use strict;
use v5.28;
### Customizations
#
my $custom_path = "/home/webdragon/Downloads/hccpa/_files/customization";
my $allversions = "$custom_path/git-export-all-versions";
my $storfile = "$custom_path/_tracking.stor";
my $tmpdir = "/tmp/all_versions_exported";
my @diffargs = qw{ -w -B }; # args to /usr/bin/diff
my $visual_diff = "/usr/bin/gvimdiff";
my $visual_diff_switches = "-f"; # don't fork gvimdiff; remain in foreground
my $patch_dir = "$custom_path/patch";
# regular expression matching files we know about and want to keep that would
# be otherwise marked as missing from core and likely discarded
## ajax.php is used by the Main.extra.tpl files so keep that
## Main.extra.tpl appends automatically to Main.tpl in the overrides so keep these
# adjust to suit your tastes
my $knownfiles = qr/(?:ajax\.php|\.extra\.tpl)$/;
## 8< ---- no further customization options ---- >8 ##
die ("cannot find local copy of git-export-all-versions") unless -e -f $allversions;
die ("Cannot find customization directory: $custom_path") unless -e -d "$custom_path/custom/";
die ("Cannot find or execute visual diff program: '$visual_diff'") unless -e -x $visual_diff;
our $VERSION = 1.2.1;
use constant TRUE => 1;
use constant FALSE => 0;
my( $_DEBUG, $_DIFF, $_DIFF3, $_PATCH );
### Modules
#
use Getopt::Long;
GetOptions(
"debug" => \$_DEBUG,
"diff" => \$_DIFF,
"diff3" => \$_DIFF3,
"patch" => \$_PATCH
) or die("error in commandline arguments\n");
use Storable qw{ store retrieve };
use Carp qw{ croak };
use File::Find::Rule;
use File::chdir;
use Data::Dumper;
$Data::Dumper::Sortkeys = TRUE;
#use Text::Diff; # insufficient support for -w (ignore all whitespace-only differences) or -B (ignore blank lines) as opposed to commandline diff
use List::Util qw{ reduce };
use File::Basename qw{ basename dirname };
use IPC::System::Simple qw{ capturex systemx };
use Try::Tiny;
use File::Temp qw{ tempfile };
use IO::Prompter;
use File::Path qw{ make_path remove_tree };
### Main
#
my (%custom_files, $testcount);
# if we've not yet stored the data, retrieve the list of matching diffs
unless ( -e -f $storfile ) {
# File::chdir
$CWD = "$custom_path/custom/";
foreach my $file ( File::Find::Rule->file()->in("./CRM/", "./templates/CRM/") )
{
# get altfile for when foo/bar/\d+/baz.tpl is really foo/bar/baz.tpl in core, but is ID'd version of form template for CiviCRM
my $altfile = $file =~ s![^/]*/(?=[^/]*$)!!r;
# look in civicrm git repositories for custom file as is
if ( -e "../civicrm-svn/$file" or -e "../civicrm-core/$file" )
{
getversions($file);
my $ref = results ( $file, diffallversions($file) );
die "ERROR: ${ref->{'file'}} is already defined in hash %custom_files\n" if ( defined $custom_files{$ref->{'file'}} );
$custom_files{$ref->{"file"}} = $ref;
unlink glob "'$tmpdir/*.*'" or warn("There was a problem removing the temporary files in $tmpdir: $!\n\n");
}
# or look in civicrm git repositories for altfile
elsif ( -e "../civicrm-svn/$altfile" or -e "../civicrm-core/$altfile" )
{
getversions($altfile);
my $ref = results ( $file, diffallversions($file), $altfile );
die "ERROR: ${ref->{'file'}} is already defined in hash %custom_files\n" if ( defined $custom_files{$ref->{'file'}} );
$custom_files{$ref->{"file"}} = $ref;
unlink glob "'$tmpdir/*.*'" or warn("There was a problem removing the temporary files in $tmpdir: $!\n\n");
}
# neither file nor altfile exist in either git repo, mark as missing
else
{
die "ERROR: $file is already defined in hash %custom_files\n" if (defined $custom_files{$file} );
$custom_files{$file} = { file => $file, missing => TRUE, file_alt => undef, best_diff => undef, timestamp => undef, diff_count => undef, tree => undef };
warn( "NO MATCH FOR =>\t$file\n\n" ) if $_DEBUG; next;
}
}
# find and make note of the specific git name-rev for this file's SHA1
foreach my $key ( sort keys %custom_files ) {
next if $custom_files{$key}{missing};
my $branch;
$CWD = "$custom_path/${custom_files{$key}{tree}}";
try {
$branch = capturex("git", "name-rev", "--name-only", $custom_files{$key}{best_diff});
}
catch {
die "could not execute 'git name-rev --name-only ${custom_files{$key}{best_diff}} in $CWD: $_";
};
chomp($branch);
$custom_files{$key}{branch} = $branch;
}
store( \%custom_files, $storfile ) or die( "cannot store \%custom_files in $storfile!" );
}
else { #we already have our difflist, so save some time and disk thrash
%custom_files = %{retrieve( $storfile )} or die( "unable to retrieve $storfile to memory" );
}
END {
### Output
#
# display results
say STDERR "final hash:", Dumper ( \%custom_files ) if $_DEBUG;
my (@missing, @other);
push @{ known_missing($_) ? \@other : \@missing }, $_ foreach grep { $custom_files{$_}->{missing} } sort keys %custom_files; #brilliant, this
sub known_missing { scalar $_ =~ /$knownfiles/ };
say "# The following @{[ scalar @missing ]} files are missing suitable counterparts in CiviCRM:\n\n",
map {" * $_\n"} @missing;
say "# The following @{[ scalar @other ]} files are known extra files and should be retained:\n\n",
map {" * $_\n"} @other;
my @unchanged = grep { ( $custom_files{$_}->{diff_count} // 1 ) == 0 } sort keys %custom_files;
say "# The following @{[ scalar @unchanged ]} files have a matching version in core with zero (or only whitespace and newlines) changes:\n\n",
map{ " * $_\n"} @unchanged;
my @changed = grep { ( $custom_files{$_}->{diff_count} // 0 ) > 0 } sort keys %custom_files;
say "# The following @{[ scalar @changed ]} files have differences with CiviCRM core originals in the manner of the following:\n";
{
no warnings 'uninitialized';
#say map{ qq| * [ custom: "${$_}{file}", orig: "${$_}{file_alt}" ] - ${$_}{diff_count}\t${$_}{tree} / ${$_}{branch}\t${$_}{best_diff}\n| } @{custom_files{ @changed }};
#wait let's make that prettier
foreach my $key ( @{custom_files{ @changed }} ) {
say <<"OUT";
* Files: custom => ${$key}{file}, orig => ${$key}{file_alt}
diff count: ${$key}{diff_count} character(s)
original source: ${$key}{tree}
best diff: ${$key}{branch} ( ${$key}{best_diff} )
OUT
}
}
### Output
#
# walk through visual diff of all changed files we can track
if ($_DIFF or $_DIFF3) {
my (%tmpopts) = ( UNLINK => 1, TMPDIR => 1);
foreach my $key (@{custom_files{ @changed} }){
# always use the right core file to diff against, i.e. not the ID'd form templates but their parents
my $corefile = defined $key->{file_alt} ? $key->{file_alt} : $key->{file};
my $count = $key->{diff_count};
# allow for a graceful early exit if you get bored
my $diffthis = prompt( "View diff of '$corefile' ($count differences)? [Y]es, (N)o, (Q)uit", -1, -keyletters, -out=> *STDERR );
if ( $diffthis =~ /N/i ) { next; } elsif ( $diffthis =~ /Q/i ) { last; }
# set up tempfiles for this diff
my ($fh1, $t1) = tempfile( "civicrm_orig_XXXXX", %tmpopts );
my ($fh2, $t2) = tempfile( "civicrm_custom_XXXXX", %tmpopts );
# populate the tempfiles with the requisite content
$CWD = "$custom_path/${$key}{tree}"; # File::chdir again
print $fh1 capturex( "git", "show", "${$key}{branch}:$corefile" );
$CWD = "$custom_path/custom";
print $fh2 capturex( "cat", ${$key}{file} );
my ($fh3, $t3) = tempfile( "civicrm_core_XXXXX", %tmpopts );
if ($_DIFF3) {
$CWD = "$custom_path/civicrm-core";
print $fh3 capturex( "git", "show", "master:$corefile" );
}
say STDERR "testing ${$key}{file} against files from civicrm core" if $_DEBUG;
my @difffiles;
if ($_DIFF3) { @difffiles = ( $t1, $t2, $t3); }
else { @difffiles = ( $t1, $t2 ); }
try {
systemx( $visual_diff => $visual_diff_switches => @difffiles );
}
catch {
die "could not get a visual diff on @difffiles: $_";
};
}
}# if diff or diff3
### Output
#
# generate patch/ directory containing only modified files, their original core counterparts,
# and a manifest containing the list of same for reference and inspection purposes in a cleaner setting
if ($_PATCH) {
mkdir $patch_dir unless -e -d $patch_dir;
$CWD = $patch_dir; # now in patch dir
foreach my $dir ( qw{ vendor custom } ) {
if ( -e -d $dir ) {
remove_tree( $dir, { safe => 1, keep_root => 1, error => \my $err });
handle_err($err);
}
else {
mkdir $dir;
}
}
my $manifest = 'manifest.yaml';
open( MANIFEST, ">$manifest" ) or die "cannot open $manifest for writing in $CWD";
print MANIFEST "%YAML 1.2\n---\nFiles:\n";
foreach my $key ( @changed ) {
my( $custom, $vendor ) = add_patch_files( $custom_files{$key} );
my $branch = $custom_files{$key}{branch};
print MANIFEST <<"OUT";
- custom: $custom
vendor: $vendor
( https://github.com/civicrm/civicrm-svn/blob/$branch/$vendor )
OUT
}
print MANIFEST "Extra:\n";
foreach my $otherfile ( @other ) {
add_other_file( $otherfile );
print MANIFEST " - $otherfile\n";
}
close MANIFEST or die "cannot close $manifest file: $!";
}# if patch
}# END
### Functions
#
# add requested files to patch directories
sub add_patch_files {
my $ref = shift;
my $c = $ref->{file};
my $v = $ref->{file_alt} ? $ref->{file_alt} : $ref->{file};
my $c_dir = dirname $c;
my $v_dir = dirname $v;
make_path( "custom/$c_dir", { error => \my $errc });
handle_err($errc);
make_path( "vendor/$v_dir", { error => \my $errv });
handle_err($errv);
open( my $fhc, ">custom/$c" ) or croak "cannot open 'custom/$c' for writing: $!";
open( my $fhv, ">vendor/$v" ) or croak "cannot open 'vendor/$v' for writing: $!";
local $CWD = "$custom_path/custom";
print $fhc capturex( "cat", $c );
close $fhc or croak "$!";
local $CWD = "$custom_path/${$ref}{tree}";
print $fhv capturex( "git", "show", "${$ref}{branch}:$v" );
close $fhv or croak "$!";
return ( $c, $v );
}
sub add_other_file {
my $file = shift;
my $file_dir = dirname $file;
make_path( "custom/$file_dir", { error => \my $erro });
handle_err($erro);
open( my $fho, ">custom/$file" ) or croak "cannot open 'custom/$file' for writing: $!";
local $CWD = "$custom_path/custom";
print $fho capturex( "cat", $file );
close $fho or croak "$!";
return 0
}
# handle errors from make_path and remove_tree
sub handle_err {
my $err = shift;
if ($err && @$err) {
for my $diag (@$err) {
my ($file, $message) = %$diag;
if ($file eq '') {
croak "general error: $message\n";
}
else {
croak "problem unlinking $file: $message\n";
}
}
}
}
# find and extract each revision to testfile from core git repos for diffing against and dump them in tmp
# using git-export-all-versions
sub getversions {
my $testfile = shift;
foreach my $gitpath ( qw{ civicrm-svn civicrm-core } )
{
my $git_dir = "$custom_path/$gitpath/";
if ( -e $git_dir ) {
local $CWD = $git_dir;
unless ( -e $testfile ) { warn "### Caution: $testfile not present in /$gitpath/\n\n"; next; }
try {
systemx( $allversions, $testfile );
}
catch {
die "could not execute systemx( $allversions $testfile ): $_";
};
} else { die "could not chdir to $git_dir"; }
}
}
# diff testfile against all of its found revisions and produce the one whose differences are fewest of all
sub diffallversions {
my( $testfile, %difflist, $bestdiff ) = shift;
foreach my $corefile ( File::Find::Rule->file()->in($tmpdir) )
{
#my $diff = diff $testfile, $corefile, { STYLE => "OldStyle" };
my $diff;
say "diffing $testfile -> $corefile from $CWD \n" if $_DEBUG;
try {
# 0 = no differences, 1 = has differences, 2 = trouble so make 0,1 acceptable exit codes.
$diff = capturex([0,1], "/usr/bin/diff", @diffargs, $corefile, $testfile);
}
catch {
die "Diff failed : $_\n";
};
$difflist{$corefile} = length( $diff );
}
$bestdiff = reduce { $difflist{$a} < $difflist{$b} ? $a : $b } keys %difflist;
say STDERR "Full Difflist for $testfile :\n", Dumper( \%difflist ) if $_DEBUG;
return ( $bestdiff, $difflist{$bestdiff} );
}
# prettify the results for the hash
sub results {
my( $orig, $bestdiff, $diffcount, $altfile) = @_;
my( $ts, $sha, $tree, $f ) = split /\./, basename $bestdiff;
return { file => $orig, file_alt => $altfile, best_diff => $sha, timestamp => $ts, tree => $tree, diff_count => $diffcount, missing => FALSE };
}
__END__
=pod
=encoding UTF-8
=head1 NAME
track-custom-parent.pl
=head1 SYNOPSIS
./track-custom-parent.pl [--debug] [--diff] >| out.md
=head1 OPTIONS
=over 4
=item C<--diff>
Cause the script to cycle through the list of files that differed from their
core counterparts, and open them in a viewer (gvimdiff, kdiff3, etc) to allow you to better
visualize what the original developer had changed in the custom files (obviously with an
eye towards migrating these changes to current core files).
The program will prompt you on a per-file basis whether you wish to see the diff file, and/or continue.
=item C<--diff3>
Cause the script to do a three way diff with original core, custom file, and current version of file
=item C<--patch>
Cause the script to create a patch/ subdirectory containing only the changed files from custom/ and copies of the
original vendor/ counterparts that are closest-match, as well as a manifest.yaml file, along with the extra files identified
for simplified migration
=item C<--debug>
Cause the script to output a slew of diagnostic information in regards
to the diff list between the file being checked and all of the files returned from the
git-export-all-versions script
=back
=head1 DESCRIPTION
In WWW_ROOT/media/civicrm/ are two directories, custom_php/ and custom_templates/, which
contain overrides for core L<CiviCRM|https://civicrm.org/> functions and templates. Often these are neglected when
updates to the CiviCRM core are performed on the server, as no one noticed anything breaking,
unless you had a developer who kept on top of such things.
Arriving at a badly outdated installation, after successfully updating both the core CMS (Joomla in my case),
and the CiviCRM install, the work was still not done, as the templating overrides still needed to be brought
into sync with the new CiviCRM core version. But how do you know what the original developer changed?
That is what this program attempts to aid in answering.
Create a custom/ directory and within it put the contents of custom_php/ and custom_templates/,
omitting the top level directory.
Then download the civicrm-svn from github to the same parent directory as custom/ above:
git clone https://github.com/civicrm/civicrm-svn.git
Then download the civicrm-core from github, again to the same parent directory as custom/:
git clone https://github.com/civicrm/civicrm-core.git
This should result in the following file tree in your current directory:
=begin text
.
├── civicrm-core/
│   ├── * etc, all files for CiviCRM > 4.2
├── civicrm-svn/
│   ├── * etc, all files for CiviCRM 4.2 and earlier
├── custom/
│   ├── CRM/ (the contents of media/civicrm/custom_php/)
│   └── templates/ (the contents of media/civicrm/custom_templates/)
│ └── CRM
├── git-export-all-versions (accompanying program)
├── track-custom-parent.pl (this program)
=end text
Running ./track-custom-parent.pl will now look at all files in custom/ and attempt to find
a corresponding match in CiviCRM{svn,core} for that file.
If it is able to find a match, it will attempt to extract all revisions of that file to a
/tmp directory, and then compare your custom version with each one, to find the revision with
the *fewest* number of differences, and mark that revision for later, repeating this process
for all files in custom/.
At the end it reports on its discoveries, optionally allowing you to view the file differences
in the viewer of your choice in a side by side format.
=head1 CAVEATS
In order to properly display the important differences in gvimdiff the following code should be added to ~/.gvimrc in order to properly
ignore whitespace-only changes, and instances where the only difference is a blank line (one or more newlines added or removed from
the original)
=begin text
set diffopt+=iwhite
set diffexpr=DiffW()
function DiffW()
let opt = ""
if &diffopt =~ "icase"
let opt = opt . "-i "
endif
if &diffopt =~ "iwhite"
let opt = opt . "-w -B " " swapped vim's -b with -w and added -B to ignore blank lines
endif
silent execute "!diff -a --binary " . opt .
\ v:fname_in . " " . v:fname_new . " > " . v:fname_out
endfunction
=end text
=head1 AUTHOR
Scott R. Godin,
L<MAD House Graphics|http://madhousegraphics.com/>
=head1 COPYRIGHT
Copyright (c) 2019 by Scott R. Godin. All rights reserved. This program is free
software; you can redistribute it and/or modify it under the same terms as Perl itself.
=head1 SEE ALSO
git-export-all-versions is based on script provided by Dmitry Shevkoplyas at
http://stackoverflow.com/questions/12850030/git-getting-all-previous-version-of-a-specific-file-folder
which led to https://github.com/truist/settings/blob/master/bin/git_export_all_file_versions which was further
modified for use with this script
=head1 LAST REVISED
Tue May 7 13:25:31 EDT 2019
=cut
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment