Created
          April 27, 2010 03:15 
        
      - 
      
- 
        Save tmc/380278 to your computer and use it in GitHub Desktop. 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #!/usr/bin/env python | |
| import os | |
| import sys | |
| import subprocess | |
| def simplify_pg_index_line(line): | |
| """ | |
| Given a postgres index file line returns it's simplified form. | |
| Returns None for comments | |
| """ | |
| TYPES_WITHOUT_OWNER = ['CAST'] | |
| parts = line.split() | |
| # bail on empty lines and comments | |
| if not parts or parts[0] == ';': | |
| return '' | |
| meta = {} # we won't actually use this but maybe this'll be handly in the future | |
| meta['size_location_info'] = (parts.pop(0), parts.pop(0)) | |
| meta['type'] = parts.pop(0) | |
| if meta['type'] not in TYPES_WITHOUT_OWNER: | |
| meta['owner'] = parts.pop() | |
| return ' '.join(parts) | |
| def get_index(dump_filename): | |
| """ | |
| Returns the text for the index of a postgres dump file | |
| """ | |
| command = os.environ.get('PG_RESTORE', 'pg_restore') | |
| p = subprocess.Popen([command, '-l', dump_filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| stdout, stderr = p.communicate() | |
| return stdout.split('\n') | |
| def return_fuzzy_matched_lines(good_lines, text): | |
| result = [] | |
| for line in text: | |
| for gl in good_lines: | |
| if gl in line: | |
| result.append(line) | |
| return result | |
| def generate_uncommon_index_contents(indexA, indexB): | |
| """ | |
| Returns a postgres index file that has lines that are unique to indexB using | |
| fuzzy matching. | |
| """ | |
| simplified_indexA = [simplify_pg_index_line(line) for line in indexA] | |
| simplified_indexB = [simplify_pg_index_line(line) for line in indexB] | |
| unique_to_B = set(simplified_indexB).difference(simplified_indexA) | |
| return '\n'.join(return_fuzzy_matched_lines(unique_to_B, indexB)) | |
| def main(): | |
| if len(sys.argv) != 3: | |
| print """Usage: %s template_dump actual_dump | |
| Generates the needed schema file to restore only the differences between two | |
| postgres schemas. I made this to deal with postgis dumps moving between | |
| machines. The template_dump should come from the source machine not the | |
| destination machine. | |
| Expects two postgres dump files created with 'pg_dump -Fc (databasename) > filename'. | |
| Example workflow: | |
| First generate dump files: | |
| pg_dump -Fc template_postgis > template_postgis.db | |
| pg_dump -Fc some_database > some_database.db | |
| Get them to the local machine (via scp or whatever) then on the local machine: | |
| python pgsql_schema_diff.py template_postgis.db some_database.db > dbcontents | |
| createdb -T template_postgis some_database | |
| pg_restore -L dbcontents -vxO1d some_database some_database.db | |
| """ % sys.argv[0] | |
| sys.exit(1) | |
| print generate_uncommon_index_contents(get_index(sys.argv[1]), get_index(sys.argv[2])) | |
| if __name__ == '__main__': | |
| main() | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment