--- /dev/null
+#!/usr/bin/env python
+#
+# Copy...
+
+"""Cleanup the LDIF output from abook_ using `python-ldap`_.
+
+.. _abook: http://abook.sourceforge.net/
+.. _python-ldap: http://www.python-ldap.org/
+"""
+
+import re
+import StringIO
+
+import ldif
+
+
+def cleanup(text, basedn):
+ # pre-parser formatting
+ text = remove_trailing_mail(text)
+ text = remove_cn_commas(text)
+
+ records = ldif.ParseLDIF(StringIO.StringIO(text))
+
+ # post-parser formatting
+ records = remove_empty_mail(records)
+ records = remove_top_objectclass(records)
+ records = add_inetorgperson_objectclass(records)
+ records = add_base_dn(records, basedn)
+ records = add_names(records)
+ records = standardize_phone_numbers(records)
+ records = standardize_country_code(records)
+ records = rename_locality(records)
+ records = rename_cellphone(records)
+ records = rename_xmozillaanyphone(records)
+ records = rename_xmozillanickname(records)
+ records = rename_homeurl(records)
+ records = set_postaladdress(records)
+
+ # convert back to a string
+ s = StringIO.StringIO()
+ writer = ldif.LDIFWriter(s)
+ for dn,record in records:
+ writer.unparse(dn, record)
+ return 'version: 1\n\n%s' % s.getvalue()
+
+def remove_trailing_mail(text):
+ """
+ >>> print(remove_trailing_mail('\\n'.join([
+ ... 'version: 1',
+ ... 'dn: cn=John Doe,mail=',
+ ... 'cn: John Doe',
+ ... '',
+ ... ])))
+ version: 1
+ dn: cn=John Doe,mail=x@y.com
+ cn: John Doe
+ <BLANKLINE>
+ """
+ return re.sub(',mail=$', ',mail=x@y.com', text, flags=re.MULTILINE)
+
+def _sub_cn_commas(match):
+ cn = match.group(1).replace(',', '_')
+ return 'cn=%s,mail=' % cn
+
+def remove_cn_commas(text):
+ """
+ >>> print(remove_cn_commas('\\n'.join([
+ ... 'version: 1',
+ ... 'dn: cn=John, Jane, and Jim Doe,mail=x@y.com',
+ ... 'cn: John, Jane, and Jim Doe',
+ ... '',
+ ... ])))
+ version: 1
+ dn: cn=John_ Jane_ and Jim Doe,mail=x@y.com
+ cn: John, Jane, and Jim Doe
+ <BLANKLINE>
+ """
+ return re.sub('cn=(.*),mail=', _sub_cn_commas, text)
+
+def remove_empty_mail(records):
+ for dn,record in records:
+ if 'mail' in record and record['mail'] == ['']:
+ record.pop('mail')
+ return records
+
+def remove_top_objectclass(records):
+ for dn,record in records:
+ if 'top' in record['objectclass']:
+ record['objectclass'].remove('top')
+ return records
+
+def add_inetorgperson_objectclass(records):
+ for dn,record in records:
+ record['objectclass'].extend(
+ ['organizationalPerson', 'inetOrgPerson', 'extensibleObject'])
+ # extensibleObject required for countryName
+ return records
+
+def add_base_dn(records, basedn):
+ regexp = re.compile(',mail=.*')
+ subst = ', ' + basedn
+ for i,(dn,record) in enumerate(records):
+ new_dn = regexp.sub(subst, dn)
+ records[i] = (new_dn, record)
+ return records
+
+def _set_key(record, key, value, override=True):
+ """Case-agnostic value setter.
+
+ >>> record = {'aB': 'old'}
+ >>> _set_key(record, 'AB', 'new')
+ >>> print record
+ """
+ key = key.lower()
+ keys = [k for k in record.keys() if k.lower() == key.lower()]
+ if keys:
+ k = keys[0]
+ else:
+ k = key
+ if override or k not in record:
+ record[k] = value
+
+def add_names(records):
+ """
+ Surname and givenName are defined in `RFC 4519`_.
+
+ .. _RFC 4512: http://tools.ietf.org/html/rfc4519
+ """
+ for dn,record in records:
+ cn = record['cn']
+ gn,sn = cn[0].rsplit(' ', 1)
+ _set_key(record, 'sn', [sn], override=False)
+ _set_key(record, 'givenName', [gn], override=False)
+ return records
+
+def standardize_phone_numbers(records):
+ """Standardize phone numbers to match `E.123`_ international notation
+
+ Assumes numbers not starting with a '+' live in the USA.
+
+ >>> import pprint
+ >>> records = [
+ ... ('cn=John', {'homephone': '123-456-7890'},
+ ... ('cn=Jane', {TODO})]
+ >>> pprint.pprint(standardize_phone_numbers(records))
+
+ .. _E.123: http://en.wikipedia.org/wiki/E.123
+ """
+ # TODO
+ return records
+
+def standardize_country_code(records):
+ # TODO
+ # ISO3166
+ # http://tools.ietf.org/html/rfc4519
+ # http://tools.ietf.org/html/rfc4517
+ #USA US
+ #Canada CA
+ #Bermuda BM
+ #Bahamas BS
+ #Netherlands NL
+ table = {
+ 'USA': 'US',
+ 'Canada': 'CA',
+ 'Bermuda': 'BM',
+ 'Bahamas': 'BS',
+ 'Netherlands': 'NL',
+ }
+ for dn,record in records:
+ if 'countryname' in record:
+ record['countryname'] = [
+ table.get(c, c) for c in record['countryname']]
+ return records
+
+def rename_locality(records):
+ # locality -> l (localityName)
+ for dn,record in records:
+ if 'locality' in record:
+ record['localityname'] = record.pop('locality')
+ return records
+
+def rename_cellphone(records):
+ # cellphone -> mobile
+ for dn,record in records:
+ if 'cellphone' in record:
+ record['mobile'] = record.pop('cellphone')
+ return records
+
+def rename_xmozillaanyphone(records):
+ # xmozillaanyphone -> telephonenumber
+ for dn,record in records:
+ if 'xmozillaanyphone' in record:
+ record['telephonenumber'] = record.pop('xmozillaanyphone')
+ return records
+
+def rename_xmozillanickname(records):
+ # xmozillanickname -> displayname
+ for dn,record in records:
+ if 'xmozillanickname' in record:
+ record['displayname'] = record.pop('xmozillanickname')
+ return records
+
+def rename_homeurl(records):
+ # homeurl -> labeledURI
+ for dn,record in records:
+ if 'homeurl' in record:
+ record['labeleduri'] = [
+ '%s Home Page' % x for x in record.pop('homeurl')]
+ return records
+
+def set_postaladdress(records):
+ # postalAddress defined in rfc4517
+ # homePostalAddress defined in ?
+ # streetAddress defined in rfc4519
+ for dn,record in records:
+ street = record.get('streetaddress', [None])[0]
+ addr2 = record.get('streetaddress2', [None])[0]
+ locality = record.get('localityname', [None])[0]
+ state = record.get('st', [None])[0]
+ if locality:
+ ls = locality
+ if state:
+ ls += ', %s' % state
+ elif state:
+ ls = state
+ else:
+ ls = None
+ post = record.get('postalcode', [None])[0]
+ country = record.get('countryname', [None])[0]
+ if 'streetaddress2' in record:
+ record.pop('streetaddress2')
+ addr = '$'.join(
+ [line for line in [street, addr2, ls, post, country] if line])
+ _set_key(record, 'homepostaladdress', [addr], override=False)
+ return records
+
+
+if __name__ == '__main__':
+ import argparse
+ import sys
+
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument(
+ '-b', '--basedn', dest='basedn', metavar='DNBASE',
+ default='ou=people,dc=example,dc=org',
+ help="Base distinguished name for the entries (%(default)s)")
+
+ args = p.parse_args()
+
+ text = sys.stdin.read()
+ text = cleanup(text, basedn=args.basedn)
+ sys.stdout.write(text)