Wrote small python utility based off of pycert to autogenerate bearSSL trust anchor header files, need to finish commenting it and created web hosted version

This commit is contained in:
Noah Laptop 2019-03-13 16:13:20 -07:00
parent ef4a55cbe8
commit ac951f1b30
2 changed files with 411 additions and 0 deletions

View file

@ -0,0 +1,286 @@
# Utility functions for a Python SSL certificate conversion tool.
# These functions are in this file so the can also be used in
# A web API implementing this tool.
# Author: Tony DiCola, Modified by Noah Koontz
#
# Dependencies:
# click - Install with 'sudo pip install click' (omit sudo on windows)
# PyOpenSSL - See homepage: https://pyopenssl.readthedocs.org/en/latest/
# Should just be a 'sudo pip install pyopenssl' command, HOWEVER
# on Windows you probably need a precompiled binary version. Try
# installing with pip and if you see errors when running that
# OpenSSL can't be found then try installing egenix's prebuilt
# PyOpenSSL library and OpenSSL lib:
# http://www.egenix.com/products/python/pyOpenSSL/
# certifi - Install with 'sudo pip install certifi' (omit sudo on windows)
import re
from OpenSSL import SSL, crypto
import socket
import textwrap
import math
import os
CERT_PATTERN = re.compile("^\-\-\-\-\-BEGIN CERTIFICATE\-\-\-\-\-[a-z,A-Z,0-9,\n,\/,+]+={0,2}\n\-\-\-\-\-END CERTIFICATE-\-\-\-\-", re.MULTILINE)
# Default name prefixes for varibles used in the hearder autogeneration
# Autogenerator will follow these names with a number
# e.g. "TA_DN0"
# Distinguished name array prefix
DN_PRE = "TA_DN"
# RSA public key number prefix
RSA_N_PRE = "TA_RSA_N"
# RSA public key exponent prefix
RSA_E_PRE = "TA_RSA_E"
# Template that defines the C header output format.
# This takes in a few named parameters:
# - guard_name: Unique name to apply to the #ifndef header guard.
# - cert_length_var: Variable/define name for the length of the certificate.
# - cert_length: Length of the certificate (in bytes).
# - cert_var: Variable name for the certificate data.
# - cert_data: Certificate data, formatted as a bearssl trust anchor array
# - cert_description: Any descriptive info about the certs to put in comments.
# NOTE: If you're changing the template make sure to escape all curly braces
# with a double brace (like {{ or }}) or else Python will try to interpret as a
# string format variable.
CFILE_TEMPLATE = """\
#ifndef _{guard_name}_H_
#define _{guard_name}_H_
#ifdef __cplusplus
extern "C"
{{
#endif
/* This file is auto-generated by the pycert_bearssl tool. Do not change it manually.
* Certificates are BearSSL br_x509_trust_anchor format. Included certs:
*
{cert_description}
*/
#define {cert_length_var} {cert_length}
{cert_data}
#ifdef __cplusplus
}} /* extern "C" */
#endif
#endif /* ifndef _{guard_name}_H_ */
"""
# Template that defines a static array of bytes
# This takes in a few named parameters:
# - ray_type: The type (int, unsigned char) to use for the static array
# - ray_name: The varible name of the static array
# - ray_data: The comma seperated data of the array (ex. "0x12, 0x34, ...")
CRAY_TEMPLATE = """\
static const {ray_type} {ray_name}[] = {{
{ray_data}
}};"""
# Template that defines a single root certificate entry in the BearSSL trust
# anchor list
# This takes in a few named parameters:
# - ta_dn_name: The name of the static byte array containing the distunguished
# name of the certificate.
# - rsa_number_name: Varible name of the static array containing the RSA number
# - rsa_exp_name: Varible name of the static array containing the RSA exponent
CROOTCA_TEMPLATE = """\
{{
{{ (unsigned char *){ta_dn_name}, sizeof {ta_dn_name} }},
BR_X509_TA_CA,
{{
BR_KEYTYPE_RSA,
{{ .rsa = {{
(unsigned char *){rsa_number_name}, sizeof {rsa_number_name},
(unsigned char *){rsa_exp_name}, sizeof {rsa_exp_name},
}} }}
}}
}},"""
# Template that defines a description of the certificate, so that the header
# file can be slightly more human readable
# This takes in a few named parameters:
# - cert_num: The index used to represent the certificate to the computer
# - cert_label: The certificate's name field (Usually CN, in the subject)
# - cert_issue: The certificate's issuer string
# - cert_subject: The certificate's subject string
CCERT_DESC_TEMPLATE = """\
* Index: {cert_num}
* Label: {cert_label}
* Subject: {cert_subject}"""
def PEM_split(cert_pem):
"""Split a certificate / certificate chain in PEM format into multiple
PEM certificates. This is useful for extracting the last / root PEM cert
in a chain for example. Will return a list of strings with each string
being an individual PEM certificate (including its '-----BEGIN CERTIFICATE...'
delineaters).
"""
# Split cert based on begin certificate sections, then reconstruct as an
# array of individual cert strings.
return re.findall(CERT_PATTERN, cert_pem)
def parse_root_certificate_store(store):
"""Parses a list of trusted root certificates, which we
can match to the respective certificates sent from the websites. The where
parameter takes a loaded certificate file (certifi.where()),
and the function returns a list of crypto.x509 objects.
"""
# perform file operations
certStore = PEM_split(store.read())
# convert the raw PEM files into x509 object
return [crypto.load_certificate(crypto.FILETYPE_PEM, pem) for pem in certStore]
def get_server_root_cert(address, port, certDict):
"""Attempt to retrieve the the root certificate in the full SSL cert chain
from the provided server address & port. The certDict parameter should
contain a dictionary of { certificate.get_subject().hash() md5 hash : certificate },
which this function will use to match the certificate chain to a stored root
certificate. This function will return a single certificate as a PyOpenSSL X509
object, or None if the chain couldn't be retrieved for some reason, or the
certDict did not contain a matching certificate.
"""
# Use PyOpenSSL to initiate an SSL connection and get the full cert chain.
# Sadly Python's built in SSL library can't do this so we must use this
# OpenSSL-based library.
cert = None
ctx = SSL.Context(SSL.TLSv1_2_METHOD)
# do the connection, and fetch the cert chain
soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
ssl_soc = SSL.Connection(ctx, soc)
ssl_soc.connect((address, port))
try:
ssl_soc.do_handshake()
cert = ssl_soc.get_peer_cert_chain()[-1]
finally:
ssl_soc.shutdown()
soc.close()
# match the certificate in the chain to the respective root certificate using the common name
if cert == None:
print("Failed to fetch certificate on domain: " + address)
return None
cn_hash = cert.get_issuer().hash()
# if there is a respective certificate, return it
# else print an error and return None
if cn_hash not in certDict:
print("Could not find matching root certificate for domain: " + address)
return None
return certDict[cn_hash]
def filter_duplicate_x509(certs):
serial_numbers = set()
out_certs = list()
# filter duplicate certs
for cert in certs:
# Skip duplicate certs where required.
if cert.get_serial_number() in serial_numbers:
continue
out_certs.append(cert)
serial_numbers.add(cert.get_serial_number())
return out_certs
def bytes_to_c_data(mah_bytes, length=None):
"""Converts a byte array to a CSV C array data format, with endlines!
e.g: 0x12, 0xA4, etc.
mah_bytes is the bytearray, and length is the number of bytes to
generate in the array, and indent is how much to indent the output
"""
ret = []
# create an array of byte strings, including an endline every 10 or so bytes
for i, bytestr in enumerate(textwrap.wrap(mah_bytes.hex(), 2)):
ret.append("0x" + bytestr + ", ")
# pad with extra zeros
while length != None and len(ret) < length:
ret.append("0x00, ")
# join, wrap, and return
return textwrap.fill(''.join(ret), width=6*12 + 5, initial_indent=' ', subsequent_indent=' ', break_long_words=False)
def decribe_cert_object(cert, cert_num):
# get the label from the subject feild on the certificate
label = ""
com = dict(cert.get_subject().get_components())
if b'CN' in com:
label = com[b'CN'].decode("utf-8")
elif b'OU' in com:
label = com[b'OU'].decode("utf-8")
elif b'O' in com:
label = com[b'O'].decode("utf-8")
# return the formated string
crypto = cert.to_cryptography()
return CCERT_DESC_TEMPLATE.format(
cert_num=cert_num,
cert_label=label,
cert_subject=crypto.subject.rfc4514_string(),
)
def x509_to_header(x509Certs, cert_var, cert_length_var, output_file, keep_dupes):
"""Combine a collection of PEM format certificates into a single C header with the
combined cert data in BearSSL format. x509Certs should be a list of pyOpenSSL x590 objects,
cert_var controls the name of the cert data variable in the output header, cert_length_var
controls the name of the cert data length variable/define, output is the output file
(which must be open for writing). Keep_dupes is a boolean to indicate if duplicate
certificates should be left intact (true) or removed (false).
"""
cert_description = ''
certs = x509Certs
if not keep_dupes:
certs = filter_duplicate_x509(x509Certs)
# Save cert data as a C style header.
# start by building each component
cert_data = ""
static_arrays = list()
CAs = list()
cert_desc = list()
for i, cert in enumerate(certs):
# add a description of the certificate to the array
cert_desc.append(decribe_cert_object(cert, i))
# build static arrays containing all the keys of the certificate
# start with distinguished name
# get the distinguished name in bytes
dn_bytes_str = bytes_to_c_data(cert.get_subject().der())
static_arrays.append(CRAY_TEMPLATE.format(
ray_type="unsigned char",
ray_name=DN_PRE + str(i),
ray_data=dn_bytes_str))
# next, the RSA public numbers
pubkey = cert.get_pubkey()
numbers = pubkey.to_cryptography_key().public_numbers()
# starting with the modulous
n_bytes_str = bytes_to_c_data(numbers.n.to_bytes(pubkey.bits() // 8, byteorder="big"))
static_arrays.append(CRAY_TEMPLATE.format(
ray_type="unsigned char",
ray_name=RSA_N_PRE + str(i),
ray_data=n_bytes_str))
# and then the exponent
e_bytes_str = bytes_to_c_data(numbers.e.to_bytes(math.ceil(numbers.e.bit_length() / 8), byteorder="big"))
static_arrays.append(CRAY_TEMPLATE.format(
ray_type="unsigned char",
ray_name=RSA_E_PRE + str(i),
ray_data=e_bytes_str))
# format the root certificate entry
CAs.append(CROOTCA_TEMPLATE.format(
ta_dn_name=DN_PRE + str(i),
rsa_number_name=RSA_N_PRE + str(i),
rsa_exp_name=RSA_E_PRE + str(i)))
# concatonate it all into the big header file template
# cert descriptions
cert_desc_out = '\n * \n'.join(cert_desc)
# static arrays
cert_data_out = '\n\n'.join(static_arrays)
cert_data_out += '\n\n' + CRAY_TEMPLATE.format(
ray_type="br_x509_trust_anchor",
ray_name=cert_var,
ray_data='\n'.join(CAs))
# create final header file
output_file.write(CFILE_TEMPLATE.format(
guard_name=os.path.splitext(output_file.name)[0].upper(),
cert_description=cert_desc_out,
cert_length_var=cert_length_var,
cert_length=str(len(certs)),
cert_data=cert_data_out,
))

View file

@ -0,0 +1,125 @@
# Python SSL certificate conversion tool.
# Download and converts SSL certs from PEM format into a C header that can be
# referenced from a sketch to load the certificate data (in binary DER format).
# Modified by the OPEnS lab to output certificate data in a format supported by
# BearSSL.
# Author: Tony DiCola, Modified by Noah Koontz
#
# Dependencies:
# click - Install with 'sudo pip install click' (omit sudo on windows)
# PyOpenSSL - See homepage: https://pyopenssl.readthedocs.org/en/latest/
# Should just be a 'sudo pip install pyopenssl' command, HOWEVER
# on Windows you probably need a precompiled binary version. Try
# installing with pip and if you see errors when running that
# OpenSSL can't be found then try installing egenix's prebuilt
# PyOpenSSL library and OpenSSL lib:
# http://www.egenix.com/products/python/pyOpenSSL/
#
import cert_util
import click
import certifi
# Default name for the cert length varible
CERT_LENGTH_NAME = "TAs_NUM"
# Defualt name for the cert array varible
CERT_ARRAY_NAME = "TAs"
# Click setup and commands:
@click.group()
def pycert_bearssl():
"""OPEnS Python Certificate Tool
This is a tool to download and convert SSL certificates and certificate
chains into a C header format that can be imported into BearSSL
"""
pass
@pycert_bearssl.command(short_help='Download SSL certs and save as a C header.')
@click.option('--port', '-p', type=click.INT, default=443,
help='port to use for reading certificate (default 443, SSL)')
@click.option('--cert-var', '-c', default=CERT_ARRAY_NAME,
help='name of the variable in the header which will contain certificate data (default: {0})'.format(CERT_ARRAY_NAME))
@click.option('--cert-length-var', '-l', default=CERT_LENGTH_NAME,
help='name of the define in the header which will contain the length of the certificate data (default: {0})'.format(CERT_LENGTH_NAME))
@click.option('--output', '-o', type=click.File('w'), default='certificates.h',
help='name of the output file (default: certificates.h)')
@click.option('--use-store', '-s', type=click.File('r'), default=certifi.where(),
help='the location of the .pem file containing a list of trusted root certificates (default: use certifi.where())')
@click.option('--keep-dupes', '-d', is_flag=True, default=False,
help='write all certs including any duplicates across domains (default: remove duplicates)')
@click.argument('domain', nargs=-1)
def download(port, cert_var, cert_length_var, output, use_store, keep_dupes, domain):
"""Download the SSL certificates for specified domain(s) and save them as a C
header file that can be imported into a sketch.
Provide at least one argument that is the domain to query for its SSL
certificate, for example google.com for Google's SSL certificate. You can
provide any number of domains as additional arguments. All of the certificates
will be combined into a single output header.
By default the file 'certificates.h' will be created, however you can change
the name of the file with the --output option.
If a chain of certificates is retrieved then only the root certificate (i.e.
the last in the chain) will be saved. However you can override this and
force the full chain to be saved with the --full-chain option.
Example of downloading google.com's SSL certificate and storing it in
certificates.h:
pycert download google.com
Example of downloading google.com and adafruit.com's SSL certificates and
storing them in data.h:
pycert download --output data.h google.com adafruit.com
Note that the certificates will be validated before they are downloaded!
"""
# prepare the root certificate store
cert_obj_store = cert_util.parse_root_certificate_store(use_store)
cert_dict = dict([(cert.get_subject().hash(), cert) for cert in cert_obj_store])
# Download the cert object for each provided domain.
down_certs = []
for d in domain:
# Download the certificate (unfortunately python will _always_ try to
# validate it so we have no control over turning that off).
cert = cert_util.get_server_root_cert(d, port, cert_dict)
if cert is None:
raise click.ClickException('Could not download and/or validate the certificate for {0} port {1}!'.format(d, port))
click.echo('Retrieved certificate for {0}'.format(d))
# append cert to array
down_certs.append(cert)
# Combine PEMs and write output header.
cert_util.x509_to_header(down_certs, cert_var, cert_length_var, output, keep_dupes)
@pycert_bearssl.command(short_help='Convert PEM certs into a C header.')
@click.option('--cert-var', '-c', default=CERT_ARRAY_NAME,
help='name of the variable in the header which will contain certificate data (default: {0})'.format(CERT_ARRAY_NAME))
@click.option('--cert-length-var', '-l', default=CERT_LENGTH_NAME,
help='name of the define in the header which will contain the length of the certificate data (default: {0})'.format(CERT_LENGTH_NAME))
@click.option('--output', '-o', type=click.File('w'), default='certificates.h',
help='name of the output file (default: certificates.h)')
@click.option('--full-chain', '-f', is_flag=True, default=False,
help='use the full certificate chain and not just the root/last cert (default: false, root cert only)')
@click.option('--keep-dupes', '-d', is_flag=True, default=False,
help='write all certs including any duplicates (default: remove duplicates)')
@click.argument('cert', type=click.File('r'), nargs=-1)
def convert(cert_var, cert_length_var, output, full_chain, keep_dupes, cert):
"""Convert PEM certificates into a C header that can be imported into a
sketch. Specify each certificate to encode as a separate argument (each
must be in PEM format) and they will be merged into a single file.
By default the file 'certificates.h' will be created, however you can change
the name of the file with the --output option.
If a chain of certificates is found then only the root certificate (i.e.
the last in the chain) will be saved. However you can override this and
force the full chain to be saved with the --full-chain option.
Example of converting a foo.pem certificate into a certificates.h header:
pycert convert foo.pem
Example of converting foo.pem and bar.pem certificates into data.h:
pycert convert foo.pem bar.pem
"""
# Load all the provided PEM files.
pems = []
for c in cert:
cert_pem = c.read()
click.echo('Loaded certificate {0}'.format(c.name))
pems.append(cert_pem)
# Combine PEMs and write output header.
PEM_to_header(pems, cert_var, cert_length_var, output, full_chain, keep_dupes)
if __name__ == '__main__':
pycert_bearssl()