#!/usr/bin/env python
#
# icd10.py - Query the ICD10 disease coding hierarchy.
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
#
"""This module contains functions for working with the `ICD10
<https://en.wikipedia.org/wiki/ICD-10>`_ disease coding hierarcy.
The :func:`toNumeric` function will take an ICD10 coding, and return a numeric
variant of it.
The :func:`storeCodes` function allows sets of ICD10 codes to be stored
so that they can be saved out to a file via the :func:`saveCodes` function, at
a later stage.
"""
import logging
import string
import itertools as it
import functools as ft
import numpy as np
import pandas as pd
import funpack.util as util
import funpack.schema.hierarchy as hierarchy
log = logging.getLogger(__name__)
[docs]
def initialise(mgr=None):
"""Initialise the module-level storage used by the :func:`storeCodes`
function.
This function must be called if this module is to be used in a
multiprocessing context. Furthermore, it must be called *before* the
``multiprocessing.Pool`` is created, so that the shared storage is
initialised before the worker processes have been created. This is
so that the shared storage will be available in the worker process'
address space.
This function does not need to be called in a single-process context.
:arg mgr: ``multiprocessing.Manager`` used to create the shared storage.
If ``None``, the created store will only work within-process.
"""
if mgr is None: storeCodes.store = []
else: storeCodes.store = mgr.list()
[docs]
def storeCodes(codes):
"""Stores the given sequence of ICD10 codes, so they can be exported to
file at a later stage.
The codes are stored in a list called ``store``, an attribute of this
function.
:arg codes: Sequence of ICD10 codes to add to the mapping file
"""
store = getattr(storeCodes, 'store', [])
store.append(codes)
storeCodes.store = store
[docs]
def saveCodes(fname, hier, fields=None):
"""Saves any codes which have been stored via :func:`storeCodes` out to
the specified file.
:arg fname: File to save the codes to.
:arg hier: :class:`.Hierarchy` object containing the ICD10
hierarchy information.
:arg fields: Sequence of fields to include in the ``mapfile``. Defaults
to ``['code', 'value', 'description', 'parent_descs]``. May
contain any of the following:
- ``'code'``
- ``'value'``
- ``'description'``
- ``'parent_codes'``
- ``'parent_descs'``
"""
if fields is None:
fields = ['code', 'value', 'description', 'parent_descs']
valid = ['code', 'value', 'description', 'parent_codes', 'parent_descs']
if not all([f in valid for f in fields]):
raise ValueError('Invalid field in: {}'.format(fields))
store = getattr(storeCodes, 'store', [])
store = pd.Series(list(it.chain(*store)), dtype=str)
store = store[store.notna()]
codes = np.sort(store.unique())
def parent_codes(c):
return ','.join(reversed(hier.parents(c)))
def parent_descs(c):
parents = reversed(hier.parents(c))
descs = [hier.description(p) for p in parents]
return ' '.join(['[{}]'.format(d) for d in descs])
df = pd.DataFrame({'code' : codes})
for f in fields:
if f == 'code': continue
elif f == 'value': func = ft.partial(hierarchy.codeToNumeric,
name='icd10')
elif f == 'description': func = hier.description
elif f == 'parent_codes': func = parent_codes
elif f == 'parent_descs': func = parent_descs
df[f] = df['code'].apply(func)
log.debug('Saving %u ICD10 codes to %s', len(df), fname)
df = df[fields]
df.to_csv(fname, sep='\t', index=False)
[docs]
def toNumeric(code):
"""Converts an ICD10 alpha-numeric code to a unique integer number.
This function may be used by the :func:`~.cleaning_functions.codeToNumeric`
cleaning function.
This function is intended to be used with ICD10 codes - if it is used to
convert longer strings containing many letters, it may produce integer values
that cannot be represented in 64 bits.
See https://www.webpt.com/blog/understanding-icd-10-code-structure/ for an
overview of the ICD10 coding scheme.
"""
# construct as a string, then
# convert to a number afterwards
number = ''
letters = string.ascii_letters
digits = string.digits
for ch in code:
if ch in letters: number += str(string.ascii_letters.index(ch) + 10)
elif ch in digits: number += ch
return int(number)