Ever had to sort a file alphabetically, only to realize that you'd have to do it manually because every item that needs to be sorted is spread over more than one line? This just happened when I exported my Gmail contacts to vCard, which it turned out were sorted by formatted name (FN) instead of name (N). The result was the following script, which takes two pattern and some input, and returns the sorted output. The example returned by ./sort_blocks.py --help is exactly the code to re-sort Gmail contacts. I'd love to know if you find any bugs or possible improvements to this script. Enjoy:

 #! /usr/bin/env python # -*- coding: utf-8 -*- ##
Copyright (C) 2009 CERN. ## ## Sort any multi-line block text ## ## This file is
free software; you can redistribute it and/or ## modify it under the terms of
the GNU General Public License as ## published by the Free Software Foundation;
either version 2 of the ## License, or (at your option) any later version. ## ##
CDS Invenio is distributed in the hope that it will be useful, but ## WITHOUT
ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU ## General Public License for more
details. ## ## You should have received a copy of the GNU General Public License
## along with CDS Invenio; if not, write to the Free Software Foundation, Inc.,
## 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. """sort_blocks.py -
Multiline sort of standard input Default syntax: ./sort_blocks.py -b 'pattern'
-s 'pattern' < input_file > result_file Options: -v,--verbose Verbose mode
-h,--help Print this message -b,--bp Block pattern (dotall multiline); used to
extract blocks -s,--sp Sort pattern (dotall multiline); extracted to sort blocks
Example: ./sort_blocks.py -b 'BEGIN:VCARD.*?END:VCARD\\r\\n' -s '^N:(.*)$' \ <
contacts.vcf > contacts2.vcf Orders vCards in contacts.vcf by name, and puts the
results in contacts2.vcf.""" import getopt import re import sys class
Usage(Exception): """Raise in case of invalid parameters""" def __init__(self,
msg): self.msg = msg def _compare_pattern(sort_pattern, text1, text2):
"""Function to sort by regex""" matches = [ re.search(sort_pattern, text,
re.DOTALL | re.MULTILINE) for text in [text1, text2]] text_matches = [] for
match in matches: if match is None: text_matches.append('') else:
text_matches.append(match.group(1)) return cmp(text_matches[0], text_matches[1])
def split_and_sort(text, block_pattern, sort_pattern): """Split into blocks,
sort them, and join them up again @param text: String of blocks to sort @param
block_pattern: Regular expression corresponding to the border between the blocks
@param sort_pattern: Gets a subset of each block to sort by""" text_blocks =
re.findall(block_pattern, text, re.DOTALL | re.MULTILINE) #print text_blocks
text_blocks.sort(lambda x, y: _compare_pattern(sort_pattern, x, y)) return
''.join(text_blocks) def main(argv = None): """Argument handling""" if argv is
None: argv = sys.argv # Defaults block_pattern = '' sort_pattern = '' try: try:
opts, args = getopt.getopt( argv[1:], 'hb:s:', ['help', 'bp=', 'sp=']) except
getopt.GetoptError, err: raise Usage(err.msg) for option, value in opts: if
option in ('-h', '--help'): print(__doc__) return 0 elif option in ('-b',
'--bp'): block_pattern = value elif option in ('-s', '--sp'): sort_pattern =
value else: raise Usage('Unhandled option ' % option) if block_pattern == '' or
sort_pattern == '' or args: raise Usage(__doc__) text = sys.stdin.read() print
split_and_sort(text, block_pattern, sort_pattern) except Usage, err:
sys.stderr.write(err.msg + '\n') return 2 if __name__ == '__main__':
sys.exit(main())