diff --git a/tools/pycert_bearssl/cert_util.py b/tools/pycert_bearssl/cert_util.py new file mode 100644 index 0000000..4fa4cd8 --- /dev/null +++ b/tools/pycert_bearssl/cert_util.py @@ -0,0 +1,286 @@ +# Utility functions for a Python SSL certificate conversion tool. +# These functions are in this file so the can also be used in +# A web API implementing this tool. +# Author: Tony DiCola, Modified by Noah Koontz +# +# Dependencies: +# click - Install with 'sudo pip install click' (omit sudo on windows) +# PyOpenSSL - See homepage: https://pyopenssl.readthedocs.org/en/latest/ +# Should just be a 'sudo pip install pyopenssl' command, HOWEVER +# on Windows you probably need a precompiled binary version. Try +# installing with pip and if you see errors when running that +# OpenSSL can't be found then try installing egenix's prebuilt +# PyOpenSSL library and OpenSSL lib: +# http://www.egenix.com/products/python/pyOpenSSL/ +# certifi - Install with 'sudo pip install certifi' (omit sudo on windows) + +import re +from OpenSSL import SSL, crypto +import socket +import textwrap +import math +import os + +CERT_PATTERN = re.compile("^\-\-\-\-\-BEGIN CERTIFICATE\-\-\-\-\-[a-z,A-Z,0-9,\n,\/,+]+={0,2}\n\-\-\-\-\-END CERTIFICATE-\-\-\-\-", re.MULTILINE) + +# Default name prefixes for varibles used in the hearder autogeneration +# Autogenerator will follow these names with a number +# e.g. "TA_DN0" +# Distinguished name array prefix +DN_PRE = "TA_DN" +# RSA public key number prefix +RSA_N_PRE = "TA_RSA_N" +# RSA public key exponent prefix +RSA_E_PRE = "TA_RSA_E" + + +# Template that defines the C header output format. +# This takes in a few named parameters: +# - guard_name: Unique name to apply to the #ifndef header guard. +# - cert_length_var: Variable/define name for the length of the certificate. +# - cert_length: Length of the certificate (in bytes). +# - cert_var: Variable name for the certificate data. +# - cert_data: Certificate data, formatted as a bearssl trust anchor array +# - cert_description: Any descriptive info about the certs to put in comments. +# NOTE: If you're changing the template make sure to escape all curly braces +# with a double brace (like {{ or }}) or else Python will try to interpret as a +# string format variable. +CFILE_TEMPLATE = """\ +#ifndef _{guard_name}_H_ +#define _{guard_name}_H_ + +#ifdef __cplusplus +extern "C" +{{ +#endif + +/* This file is auto-generated by the pycert_bearssl tool. Do not change it manually. + * Certificates are BearSSL br_x509_trust_anchor format. Included certs: + * +{cert_description} + */ + +#define {cert_length_var} {cert_length} + +{cert_data} + +#ifdef __cplusplus +}} /* extern "C" */ +#endif + +#endif /* ifndef _{guard_name}_H_ */ +""" + +# Template that defines a static array of bytes +# This takes in a few named parameters: +# - ray_type: The type (int, unsigned char) to use for the static array +# - ray_name: The varible name of the static array +# - ray_data: The comma seperated data of the array (ex. "0x12, 0x34, ...") +CRAY_TEMPLATE = """\ +static const {ray_type} {ray_name}[] = {{ +{ray_data} +}};""" + +# Template that defines a single root certificate entry in the BearSSL trust +# anchor list +# This takes in a few named parameters: +# - ta_dn_name: The name of the static byte array containing the distunguished +# name of the certificate. +# - rsa_number_name: Varible name of the static array containing the RSA number +# - rsa_exp_name: Varible name of the static array containing the RSA exponent +CROOTCA_TEMPLATE = """\ + {{ + {{ (unsigned char *){ta_dn_name}, sizeof {ta_dn_name} }}, + BR_X509_TA_CA, + {{ + BR_KEYTYPE_RSA, + {{ .rsa = {{ + (unsigned char *){rsa_number_name}, sizeof {rsa_number_name}, + (unsigned char *){rsa_exp_name}, sizeof {rsa_exp_name}, + }} }} + }} + }},""" + +# Template that defines a description of the certificate, so that the header +# file can be slightly more human readable +# This takes in a few named parameters: +# - cert_num: The index used to represent the certificate to the computer +# - cert_label: The certificate's name field (Usually CN, in the subject) +# - cert_issue: The certificate's issuer string +# - cert_subject: The certificate's subject string +CCERT_DESC_TEMPLATE = """\ + * Index: {cert_num} + * Label: {cert_label} + * Subject: {cert_subject}""" + +def PEM_split(cert_pem): + """Split a certificate / certificate chain in PEM format into multiple + PEM certificates. This is useful for extracting the last / root PEM cert + in a chain for example. Will return a list of strings with each string + being an individual PEM certificate (including its '-----BEGIN CERTIFICATE...' + delineaters). + """ + # Split cert based on begin certificate sections, then reconstruct as an + # array of individual cert strings. + return re.findall(CERT_PATTERN, cert_pem) + +def parse_root_certificate_store(store): + """Parses a list of trusted root certificates, which we + can match to the respective certificates sent from the websites. The where + parameter takes a loaded certificate file (certifi.where()), + and the function returns a list of crypto.x509 objects. + """ + # perform file operations + certStore = PEM_split(store.read()) + # convert the raw PEM files into x509 object + return [crypto.load_certificate(crypto.FILETYPE_PEM, pem) for pem in certStore] + +def get_server_root_cert(address, port, certDict): + """Attempt to retrieve the the root certificate in the full SSL cert chain + from the provided server address & port. The certDict parameter should + contain a dictionary of { certificate.get_subject().hash() md5 hash : certificate }, + which this function will use to match the certificate chain to a stored root + certificate. This function will return a single certificate as a PyOpenSSL X509 + object, or None if the chain couldn't be retrieved for some reason, or the + certDict did not contain a matching certificate. + """ + # Use PyOpenSSL to initiate an SSL connection and get the full cert chain. + # Sadly Python's built in SSL library can't do this so we must use this + # OpenSSL-based library. + cert = None + ctx = SSL.Context(SSL.TLSv1_2_METHOD) + # do the connection, and fetch the cert chain + soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + ssl_soc = SSL.Connection(ctx, soc) + ssl_soc.connect((address, port)) + try: + ssl_soc.do_handshake() + cert = ssl_soc.get_peer_cert_chain()[-1] + finally: + ssl_soc.shutdown() + soc.close() + # match the certificate in the chain to the respective root certificate using the common name + if cert == None: + print("Failed to fetch certificate on domain: " + address) + return None + cn_hash = cert.get_issuer().hash() + # if there is a respective certificate, return it + # else print an error and return None + if cn_hash not in certDict: + print("Could not find matching root certificate for domain: " + address) + return None + return certDict[cn_hash] + +def filter_duplicate_x509(certs): + serial_numbers = set() + out_certs = list() + # filter duplicate certs + for cert in certs: + # Skip duplicate certs where required. + if cert.get_serial_number() in serial_numbers: + continue + out_certs.append(cert) + serial_numbers.add(cert.get_serial_number()) + return out_certs + +def bytes_to_c_data(mah_bytes, length=None): + """Converts a byte array to a CSV C array data format, with endlines! + e.g: 0x12, 0xA4, etc. + mah_bytes is the bytearray, and length is the number of bytes to + generate in the array, and indent is how much to indent the output + """ + ret = [] + # create an array of byte strings, including an endline every 10 or so bytes + for i, bytestr in enumerate(textwrap.wrap(mah_bytes.hex(), 2)): + ret.append("0x" + bytestr + ", ") + # pad with extra zeros + while length != None and len(ret) < length: + ret.append("0x00, ") + # join, wrap, and return + return textwrap.fill(''.join(ret), width=6*12 + 5, initial_indent=' ', subsequent_indent=' ', break_long_words=False) + +def decribe_cert_object(cert, cert_num): + # get the label from the subject feild on the certificate + label = "" + com = dict(cert.get_subject().get_components()) + if b'CN' in com: + label = com[b'CN'].decode("utf-8") + elif b'OU' in com: + label = com[b'OU'].decode("utf-8") + elif b'O' in com: + label = com[b'O'].decode("utf-8") + # return the formated string + crypto = cert.to_cryptography() + return CCERT_DESC_TEMPLATE.format( + cert_num=cert_num, + cert_label=label, + cert_subject=crypto.subject.rfc4514_string(), + ) + + +def x509_to_header(x509Certs, cert_var, cert_length_var, output_file, keep_dupes): + """Combine a collection of PEM format certificates into a single C header with the + combined cert data in BearSSL format. x509Certs should be a list of pyOpenSSL x590 objects, + cert_var controls the name of the cert data variable in the output header, cert_length_var + controls the name of the cert data length variable/define, output is the output file + (which must be open for writing). Keep_dupes is a boolean to indicate if duplicate + certificates should be left intact (true) or removed (false). + """ + cert_description = '' + certs = x509Certs + if not keep_dupes: + certs = filter_duplicate_x509(x509Certs) + # Save cert data as a C style header. + # start by building each component + cert_data = "" + static_arrays = list() + CAs = list() + cert_desc = list() + for i, cert in enumerate(certs): + # add a description of the certificate to the array + cert_desc.append(decribe_cert_object(cert, i)) + # build static arrays containing all the keys of the certificate + # start with distinguished name + # get the distinguished name in bytes + dn_bytes_str = bytes_to_c_data(cert.get_subject().der()) + static_arrays.append(CRAY_TEMPLATE.format( + ray_type="unsigned char", + ray_name=DN_PRE + str(i), + ray_data=dn_bytes_str)) + # next, the RSA public numbers + pubkey = cert.get_pubkey() + numbers = pubkey.to_cryptography_key().public_numbers() + # starting with the modulous + n_bytes_str = bytes_to_c_data(numbers.n.to_bytes(pubkey.bits() // 8, byteorder="big")) + static_arrays.append(CRAY_TEMPLATE.format( + ray_type="unsigned char", + ray_name=RSA_N_PRE + str(i), + ray_data=n_bytes_str)) + # and then the exponent + e_bytes_str = bytes_to_c_data(numbers.e.to_bytes(math.ceil(numbers.e.bit_length() / 8), byteorder="big")) + static_arrays.append(CRAY_TEMPLATE.format( + ray_type="unsigned char", + ray_name=RSA_E_PRE + str(i), + ray_data=e_bytes_str)) + # format the root certificate entry + CAs.append(CROOTCA_TEMPLATE.format( + ta_dn_name=DN_PRE + str(i), + rsa_number_name=RSA_N_PRE + str(i), + rsa_exp_name=RSA_E_PRE + str(i))) + # concatonate it all into the big header file template + # cert descriptions + cert_desc_out = '\n * \n'.join(cert_desc) + # static arrays + cert_data_out = '\n\n'.join(static_arrays) + cert_data_out += '\n\n' + CRAY_TEMPLATE.format( + ray_type="br_x509_trust_anchor", + ray_name=cert_var, + ray_data='\n'.join(CAs)) + # create final header file + output_file.write(CFILE_TEMPLATE.format( + guard_name=os.path.splitext(output_file.name)[0].upper(), + cert_description=cert_desc_out, + cert_length_var=cert_length_var, + cert_length=str(len(certs)), + cert_data=cert_data_out, + )) \ No newline at end of file diff --git a/tools/pycert_bearssl/pycert_bearssl.py b/tools/pycert_bearssl/pycert_bearssl.py new file mode 100644 index 0000000..a92beaa --- /dev/null +++ b/tools/pycert_bearssl/pycert_bearssl.py @@ -0,0 +1,125 @@ +# Python SSL certificate conversion tool. +# Download and converts SSL certs from PEM format into a C header that can be +# referenced from a sketch to load the certificate data (in binary DER format). +# Modified by the OPEnS lab to output certificate data in a format supported by +# BearSSL. +# Author: Tony DiCola, Modified by Noah Koontz +# +# Dependencies: +# click - Install with 'sudo pip install click' (omit sudo on windows) +# PyOpenSSL - See homepage: https://pyopenssl.readthedocs.org/en/latest/ +# Should just be a 'sudo pip install pyopenssl' command, HOWEVER +# on Windows you probably need a precompiled binary version. Try +# installing with pip and if you see errors when running that +# OpenSSL can't be found then try installing egenix's prebuilt +# PyOpenSSL library and OpenSSL lib: +# http://www.egenix.com/products/python/pyOpenSSL/ +# +import cert_util +import click +import certifi + +# Default name for the cert length varible +CERT_LENGTH_NAME = "TAs_NUM" +# Defualt name for the cert array varible +CERT_ARRAY_NAME = "TAs" + +# Click setup and commands: +@click.group() +def pycert_bearssl(): + """OPEnS Python Certificate Tool + This is a tool to download and convert SSL certificates and certificate + chains into a C header format that can be imported into BearSSL + """ + pass + +@pycert_bearssl.command(short_help='Download SSL certs and save as a C header.') +@click.option('--port', '-p', type=click.INT, default=443, + help='port to use for reading certificate (default 443, SSL)') +@click.option('--cert-var', '-c', default=CERT_ARRAY_NAME, + help='name of the variable in the header which will contain certificate data (default: {0})'.format(CERT_ARRAY_NAME)) +@click.option('--cert-length-var', '-l', default=CERT_LENGTH_NAME, + help='name of the define in the header which will contain the length of the certificate data (default: {0})'.format(CERT_LENGTH_NAME)) +@click.option('--output', '-o', type=click.File('w'), default='certificates.h', + help='name of the output file (default: certificates.h)') +@click.option('--use-store', '-s', type=click.File('r'), default=certifi.where(), + help='the location of the .pem file containing a list of trusted root certificates (default: use certifi.where())') +@click.option('--keep-dupes', '-d', is_flag=True, default=False, + help='write all certs including any duplicates across domains (default: remove duplicates)') +@click.argument('domain', nargs=-1) +def download(port, cert_var, cert_length_var, output, use_store, keep_dupes, domain): + """Download the SSL certificates for specified domain(s) and save them as a C + header file that can be imported into a sketch. + Provide at least one argument that is the domain to query for its SSL + certificate, for example google.com for Google's SSL certificate. You can + provide any number of domains as additional arguments. All of the certificates + will be combined into a single output header. + By default the file 'certificates.h' will be created, however you can change + the name of the file with the --output option. + If a chain of certificates is retrieved then only the root certificate (i.e. + the last in the chain) will be saved. However you can override this and + force the full chain to be saved with the --full-chain option. + Example of downloading google.com's SSL certificate and storing it in + certificates.h: + pycert download google.com + Example of downloading google.com and adafruit.com's SSL certificates and + storing them in data.h: + pycert download --output data.h google.com adafruit.com + Note that the certificates will be validated before they are downloaded! + """ + # prepare the root certificate store + cert_obj_store = cert_util.parse_root_certificate_store(use_store) + cert_dict = dict([(cert.get_subject().hash(), cert) for cert in cert_obj_store]) + # Download the cert object for each provided domain. + down_certs = [] + for d in domain: + # Download the certificate (unfortunately python will _always_ try to + # validate it so we have no control over turning that off). + cert = cert_util.get_server_root_cert(d, port, cert_dict) + if cert is None: + raise click.ClickException('Could not download and/or validate the certificate for {0} port {1}!'.format(d, port)) + click.echo('Retrieved certificate for {0}'.format(d)) + # append cert to array + down_certs.append(cert) + # Combine PEMs and write output header. + cert_util.x509_to_header(down_certs, cert_var, cert_length_var, output, keep_dupes) + + +@pycert_bearssl.command(short_help='Convert PEM certs into a C header.') +@click.option('--cert-var', '-c', default=CERT_ARRAY_NAME, + help='name of the variable in the header which will contain certificate data (default: {0})'.format(CERT_ARRAY_NAME)) +@click.option('--cert-length-var', '-l', default=CERT_LENGTH_NAME, + help='name of the define in the header which will contain the length of the certificate data (default: {0})'.format(CERT_LENGTH_NAME)) +@click.option('--output', '-o', type=click.File('w'), default='certificates.h', + help='name of the output file (default: certificates.h)') +@click.option('--full-chain', '-f', is_flag=True, default=False, + help='use the full certificate chain and not just the root/last cert (default: false, root cert only)') +@click.option('--keep-dupes', '-d', is_flag=True, default=False, + help='write all certs including any duplicates (default: remove duplicates)') +@click.argument('cert', type=click.File('r'), nargs=-1) +def convert(cert_var, cert_length_var, output, full_chain, keep_dupes, cert): + """Convert PEM certificates into a C header that can be imported into a + sketch. Specify each certificate to encode as a separate argument (each + must be in PEM format) and they will be merged into a single file. + By default the file 'certificates.h' will be created, however you can change + the name of the file with the --output option. + If a chain of certificates is found then only the root certificate (i.e. + the last in the chain) will be saved. However you can override this and + force the full chain to be saved with the --full-chain option. + Example of converting a foo.pem certificate into a certificates.h header: + pycert convert foo.pem + Example of converting foo.pem and bar.pem certificates into data.h: + pycert convert foo.pem bar.pem + """ + # Load all the provided PEM files. + pems = [] + for c in cert: + cert_pem = c.read() + click.echo('Loaded certificate {0}'.format(c.name)) + pems.append(cert_pem) + # Combine PEMs and write output header. + PEM_to_header(pems, cert_var, cert_length_var, output, full_chain, keep_dupes) + + +if __name__ == '__main__': + pycert_bearssl() \ No newline at end of file