Add LDAP post.
[blog.git] / posts / LDAP / abook-ldif-cleanup.py
diff --git a/posts/LDAP/abook-ldif-cleanup.py b/posts/LDAP/abook-ldif-cleanup.py
new file mode 100755 (executable)
index 0000000..5fd04f6
--- /dev/null
@@ -0,0 +1,252 @@
+#!/usr/bin/env python
+#
+# Copy...
+
+"""Cleanup the LDIF output from abook_ using `python-ldap`_.
+
+.. _abook: http://abook.sourceforge.net/
+.. _python-ldap: http://www.python-ldap.org/
+"""
+
+import re
+import StringIO
+
+import ldif
+
+
+def cleanup(text, basedn):
+    # pre-parser formatting
+    text = remove_trailing_mail(text)
+    text = remove_cn_commas(text)
+
+    records = ldif.ParseLDIF(StringIO.StringIO(text))
+
+    # post-parser formatting
+    records = remove_empty_mail(records)
+    records = remove_top_objectclass(records)
+    records = add_inetorgperson_objectclass(records)
+    records = add_base_dn(records, basedn)
+    records = add_names(records)
+    records = standardize_phone_numbers(records)
+    records = standardize_country_code(records)
+    records = rename_locality(records)
+    records = rename_cellphone(records)
+    records = rename_xmozillaanyphone(records)
+    records = rename_xmozillanickname(records)
+    records = rename_homeurl(records)
+    records = set_postaladdress(records)
+
+    # convert back to a string
+    s = StringIO.StringIO()
+    writer = ldif.LDIFWriter(s)
+    for dn,record in records:
+        writer.unparse(dn, record)
+    return 'version: 1\n\n%s' % s.getvalue()
+
+def remove_trailing_mail(text):
+    """
+    >>> print(remove_trailing_mail('\\n'.join([
+    ...     'version: 1',
+    ...     'dn: cn=John Doe,mail=',
+    ...     'cn: John Doe',
+    ...     '',
+    ...     ])))
+    version: 1
+    dn: cn=John Doe,mail=x@y.com
+    cn: John Doe
+    <BLANKLINE>
+    """
+    return re.sub(',mail=$', ',mail=x@y.com', text, flags=re.MULTILINE)
+
+def _sub_cn_commas(match):
+    cn = match.group(1).replace(',', '_')
+    return 'cn=%s,mail=' % cn
+
+def remove_cn_commas(text):
+    """
+    >>> print(remove_cn_commas('\\n'.join([
+    ...     'version: 1',
+    ...     'dn: cn=John, Jane, and Jim Doe,mail=x@y.com',
+    ...     'cn: John, Jane, and Jim Doe',
+    ...     '',
+    ...     ])))
+    version: 1
+    dn: cn=John_ Jane_ and Jim Doe,mail=x@y.com
+    cn: John, Jane, and Jim Doe
+    <BLANKLINE>
+    """
+    return re.sub('cn=(.*),mail=', _sub_cn_commas, text)
+
+def remove_empty_mail(records):
+    for dn,record in records:
+        if 'mail' in record and record['mail'] == ['']:
+            record.pop('mail')
+    return records
+
+def remove_top_objectclass(records):
+    for dn,record in records:
+        if 'top' in record['objectclass']:
+            record['objectclass'].remove('top')
+    return records
+
+def add_inetorgperson_objectclass(records):
+    for dn,record in records:
+        record['objectclass'].extend(
+            ['organizationalPerson', 'inetOrgPerson', 'extensibleObject'])
+        # extensibleObject required for countryName
+    return records
+
+def add_base_dn(records, basedn):
+    regexp = re.compile(',mail=.*')
+    subst = ', ' + basedn
+    for i,(dn,record) in enumerate(records):
+        new_dn = regexp.sub(subst, dn)
+        records[i] = (new_dn, record)
+    return records
+
+def _set_key(record, key, value, override=True):
+    """Case-agnostic value setter.
+
+    >>> record = {'aB': 'old'}
+    >>> _set_key(record, 'AB', 'new')
+    >>> print record
+    """
+    key = key.lower()
+    keys = [k for k in record.keys() if k.lower() == key.lower()]
+    if keys:
+        k = keys[0]
+    else:
+        k = key
+    if override or k not in record:
+        record[k] = value
+
+def add_names(records):
+    """
+    Surname and givenName are defined in `RFC 4519`_.
+
+    .. _RFC 4512: http://tools.ietf.org/html/rfc4519
+    """
+    for dn,record in records:
+        cn = record['cn']
+        gn,sn = cn[0].rsplit(' ', 1)
+        _set_key(record, 'sn', [sn], override=False)
+        _set_key(record, 'givenName', [gn], override=False)
+    return records
+
+def standardize_phone_numbers(records):
+    """Standardize phone numbers to match `E.123`_ international notation
+
+    Assumes numbers not starting with a '+' live in the USA.
+
+    >>> import pprint
+    >>> records = [
+    ...     ('cn=John', {'homephone': '123-456-7890'},
+    ...     ('cn=Jane', {TODO})]
+    >>> pprint.pprint(standardize_phone_numbers(records))
+
+    .. _E.123: http://en.wikipedia.org/wiki/E.123
+    """
+    # TODO
+    return records
+
+def standardize_country_code(records):
+    # TODO
+    # ISO3166
+    # http://tools.ietf.org/html/rfc4519
+    # http://tools.ietf.org/html/rfc4517
+    #USA      US
+    #Canada   CA
+    #Bermuda  BM
+    #Bahamas  BS
+    #Netherlands NL
+    table = {
+        'USA': 'US',
+        'Canada': 'CA',
+        'Bermuda': 'BM',
+        'Bahamas': 'BS',
+        'Netherlands': 'NL',
+        }
+    for dn,record in records:
+        if 'countryname' in record:
+            record['countryname'] = [
+                table.get(c, c) for c in record['countryname']]
+    return records
+
+def rename_locality(records):
+    # locality -> l (localityName)
+    for dn,record in records:
+        if 'locality' in record:
+            record['localityname'] = record.pop('locality')
+    return records
+
+def rename_cellphone(records):
+    # cellphone -> mobile
+    for dn,record in records:
+        if 'cellphone' in record:
+            record['mobile'] = record.pop('cellphone')
+    return records
+
+def rename_xmozillaanyphone(records):
+    # xmozillaanyphone -> telephonenumber
+    for dn,record in records:
+        if 'xmozillaanyphone' in record:
+            record['telephonenumber'] = record.pop('xmozillaanyphone')
+    return records
+
+def rename_xmozillanickname(records):
+    # xmozillanickname -> displayname
+    for dn,record in records:
+        if 'xmozillanickname' in record:
+            record['displayname'] = record.pop('xmozillanickname')
+    return records
+
+def rename_homeurl(records):
+    # homeurl -> labeledURI
+    for dn,record in records:
+        if 'homeurl' in record:
+            record['labeleduri'] = [
+                '%s Home Page' % x for x in record.pop('homeurl')]
+    return records
+
+def set_postaladdress(records):
+    # postalAddress defined in rfc4517
+    # homePostalAddress defined in ?
+    # streetAddress defined in rfc4519
+    for dn,record in records:
+        street = record.get('streetaddress', [None])[0]
+        addr2 = record.get('streetaddress2', [None])[0]
+        locality = record.get('localityname', [None])[0]
+        state = record.get('st', [None])[0]
+        if locality:
+            ls = locality
+            if state:
+                ls += ', %s' % state
+        elif state:
+            ls = state
+        else:
+            ls = None
+        post = record.get('postalcode', [None])[0]
+        country = record.get('countryname', [None])[0]
+        if 'streetaddress2' in record:
+            record.pop('streetaddress2')
+        addr = '$'.join(
+            [line for line in [street, addr2, ls, post, country] if line])
+        _set_key(record, 'homepostaladdress', [addr], override=False)
+    return records
+
+
+if __name__ == '__main__':
+    import argparse
+    import sys
+
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument(
+        '-b', '--basedn', dest='basedn', metavar='DNBASE',
+        default='ou=people,dc=example,dc=org',
+        help="Base distinguished name for the entries (%(default)s)")
+
+    args = p.parse_args()
+
+    text = sys.stdin.read()
+    text = cleanup(text, basedn=args.basedn)
+    sys.stdout.write(text)