In [2]:
### In this box, I open the file "Sample_redone.txt", which is the hl7 sample file
### with all the values put into one line with the appropiate "/m" placements. The
### "r" in the open function means read only.

import numpy as np ### For later use

step_1 = open("Sample_redone.txt", "r")
step_2 = step_1.read()

In [3]:
### In this box, I split the original message into segments by spliting
### anywhere there's an "/m" in the message

step_3 = step_2.split(sep="/m")
step_3

### Notice the result is a list with 6 items (each segment)


Out[3]:
['MSH|^~\\&|CERNER||PriorityHealth||||ORU^R01|Q479004375T431430612|P|2.3',
 'PID|||001677980||SMITH^CURTIS||19680219|M||||||||||929645156318|123456789',
 'PD1||||1234567890^LAST^FIRST^M^^^^^NPI',
 'OBR|1|341856649^HNAM_ORDERID|000002006326002362|648088^Basic Metabolic Panel||',
 '20061122151600|||||||||1620^Hooker^Robert^L||||||20061122154733|||F|||||||||||20061122140000',
 'OBX|1|NM|GLU^Glucose Lvl|59|mg/dL|65-99^65^99|L|||F|||20061122154733/n']

In [4]:
### In this box, I seperated each segment into individual fields using
### "|" as the split indicator. 

step_4 = list(map(lambda x: x.split(sep="|"), step_3))
step_4

### Notice the result is now a list of list. 6 lists are now organized
### into a larger list.


Out[4]:
[['MSH',
  '^~\\&',
  'CERNER',
  '',
  'PriorityHealth',
  '',
  '',
  '',
  'ORU^R01',
  'Q479004375T431430612',
  'P',
  '2.3'],
 ['PID',
  '',
  '',
  '001677980',
  '',
  'SMITH^CURTIS',
  '',
  '19680219',
  'M',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '929645156318',
  '123456789'],
 ['PD1', '', '', '', '1234567890^LAST^FIRST^M^^^^^NPI'],
 ['OBR',
  '1',
  '341856649^HNAM_ORDERID',
  '000002006326002362',
  '648088^Basic Metabolic Panel',
  '',
  ''],
 ['20061122151600',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '1620^Hooker^Robert^L',
  '',
  '',
  '',
  '',
  '',
  '20061122154733',
  '',
  '',
  'F',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '',
  '20061122140000'],
 ['OBX',
  '1',
  'NM',
  'GLU^Glucose Lvl',
  '59',
  'mg/dL',
  '65-99^65^99',
  'L',
  '',
  '',
  'F',
  '',
  '',
  '20061122154733/n']]

In [5]:
### All I do in this box is take the result of step_4 (list of lists)
### and turn it into an array. (I needd to do this in order to run
### step 6)

step_5 = np.asarray(step_4)
step_5

### Notice the "np." I'm calling the package numpy which I imported earlier.


Out[5]:
array([ ['MSH', '^~\\&', 'CERNER', '', 'PriorityHealth', '', '', '', 'ORU^R01', 'Q479004375T431430612', 'P', '2.3'],
       ['PID', '', '', '001677980', '', 'SMITH^CURTIS', '', '19680219', 'M', '', '', '', '', '', '', '', '', '', '929645156318', '123456789'],
       ['PD1', '', '', '', '1234567890^LAST^FIRST^M^^^^^NPI'],
       ['OBR', '1', '341856649^HNAM_ORDERID', '000002006326002362', '648088^Basic Metabolic Panel', '', ''],
       ['20061122151600', '', '', '', '', '', '', '', '', '1620^Hooker^Robert^L', '', '', '', '', '', '20061122154733', '', '', 'F', '', '', '', '', '', '', '', '', '', '', '20061122140000'],
       ['OBX', '1', 'NM', 'GLU^Glucose Lvl', '59', 'mg/dL', '65-99^65^99', 'L', '', '', 'F', '', '', '20061122154733/n']], dtype=object)

In [6]:
### In order to run the next bit of code, understand that step_5 followed
### by a in brackets [#], means I'm running the code only on the #th 
### component of the array. So step_5[1] would be the SECOND component
### of the array. (Remeber Python is a 0 based numbering system)

step_6a = list(map(lambda x: x.split(sep="^"), (step_5[0])))
step_6b = list(map(lambda x: x.split(sep="^"), (step_5[1])))
step_6c = list(map(lambda x: x.split(sep="^"), (step_5[2])))
step_6d = list(map(lambda x: x.split(sep="^"), (step_5[3])))
step_6e = list(map(lambda x: x.split(sep="^"), (step_5[4])))
step_6f = list(map(lambda x: x.split(sep="^"), (step_5[5])))

### Side-note: If anyone could create a function which completes 
### step_6 and step_7 given an array of arrays of n length, that
### would move us towards solving a good part of this project.

In [7]:
### Now I combine the results od step 6 into one array.

step_7 = [step_6a, step_6b, step_6c, step_6d, step_6e, step_6f]

### Play around pulling different segments to check the work of the functions
### using the brackets. Ex. (step_7[0]) or (step_7[4]) ect. ->

# step_7[0]
# step_7[3]
# step_7[2]

In [8]:
### Notice the results of step_7 yeild an array of lists of arrays. 
### Now segments, fields, and subcomponents are correctly ordered in the array.


### The next bit of code is my beginnings of turning step_7 into a 
### dictionary, with key:value pairs. The key being its placement, 
### ex. (Segment 1, Field 6, Subfield 2, ect.) and the value being the 
### cooresponing value in the array. ex. ('MHS', 'Priority Health, ect.)

In [9]:
### Two functions I created to help turn this into a dictionary. 
### function_segment(y) first creates a list of KEYS the size of y, then it
### binds this list to the values fed in the function for y. See what this
### produces when y is step_7

def function_segment(y):
    temp_list = list()
    for x in range(len(y)):
        temp_list.append("Segment" + str(x+1))
    new = dict(zip(temp_list, y))
    return new

step_8 = function_segment(step_7)
step_8

### Notice each segment has been successfully named


Out[9]:
{'Segment1': [['MSH'],
  ['', '~\\&'],
  ['CERNER'],
  [''],
  ['PriorityHealth'],
  [''],
  [''],
  [''],
  ['ORU', 'R01'],
  ['Q479004375T431430612'],
  ['P'],
  ['2.3']],
 'Segment2': [['PID'],
  [''],
  [''],
  ['001677980'],
  [''],
  ['SMITH', 'CURTIS'],
  [''],
  ['19680219'],
  ['M'],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  ['929645156318'],
  ['123456789']],
 'Segment3': [['PD1'],
  [''],
  [''],
  [''],
  ['1234567890', 'LAST', 'FIRST', 'M', '', '', '', '', 'NPI']],
 'Segment4': [['OBR'],
  ['1'],
  ['341856649', 'HNAM_ORDERID'],
  ['000002006326002362'],
  ['648088', 'Basic Metabolic Panel'],
  [''],
  ['']],
 'Segment5': [['20061122151600'],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  ['1620', 'Hooker', 'Robert', 'L'],
  [''],
  [''],
  [''],
  [''],
  [''],
  ['20061122154733'],
  [''],
  [''],
  ['F'],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  [''],
  ['20061122140000']],
 'Segment6': [['OBX'],
  ['1'],
  ['NM'],
  ['GLU', 'Glucose Lvl'],
  ['59'],
  ['mg/dL'],
  ['65-99', '65', '99'],
  ['L'],
  [''],
  [''],
  ['F'],
  [''],
  [''],
  ['20061122154733/n']]}

In [19]:
### This next function is literally exactly thesame as the last, it
### just names things "Field#" instead of "Segment#"

def function_field(y):
    temp_list = list()
    for x in range(len(y)):
        temp_list.append("Field" + str(x+1))
    new = dict(zip(temp_list, y))
    return new

### Now I'm going to import each SEGMENT from step_8 into this function
### at a future date I will create a program to do this (and hopefully step_6)
### but for now, it's manual

step_9 = ((function_field(step_8['Segment1'])), (function_field(step_8['Segment2'])),
          (function_field(step_8['Segment3'])), (function_field(step_8['Segment4'])),
          (function_field(step_8['Segment5'])))

### And look at that! It erases the Segment numbers! Wonderful, well
### I'll work on fixing that up later. This is where I stop. I have
### a half a dozen boxes more of code throughout this that I'm working with,
### but I only wanted to include what was actually working. Add, subtract, or
### do whatever possible to make this better. Thanks guys!

In [20]:
### try number 2!

step_9b = ((function_field(step_7[0])), (function_field(step_7[1])),
          (function_field(step_7[2])), (function_field(step_7[3])),
          (function_field(step_7[4])), function_field(step_7[5]))

step_9c = function_segment(step_9b)
step_9c


Out[20]:
{'Segment1': {'Field1': ['MSH'],
  'Field10': ['Q479004375T431430612'],
  'Field11': ['P'],
  'Field12': ['2.3'],
  'Field2': ['', '~\\&'],
  'Field3': ['CERNER'],
  'Field4': [''],
  'Field5': ['PriorityHealth'],
  'Field6': [''],
  'Field7': [''],
  'Field8': [''],
  'Field9': ['ORU', 'R01']},
 'Segment2': {'Field1': ['PID'],
  'Field10': [''],
  'Field11': [''],
  'Field12': [''],
  'Field13': [''],
  'Field14': [''],
  'Field15': [''],
  'Field16': [''],
  'Field17': [''],
  'Field18': [''],
  'Field19': ['929645156318'],
  'Field2': [''],
  'Field20': ['123456789'],
  'Field3': [''],
  'Field4': ['001677980'],
  'Field5': [''],
  'Field6': ['SMITH', 'CURTIS'],
  'Field7': [''],
  'Field8': ['19680219'],
  'Field9': ['M']},
 'Segment3': {'Field1': ['PD1'],
  'Field2': [''],
  'Field3': [''],
  'Field4': [''],
  'Field5': ['1234567890', 'LAST', 'FIRST', 'M', '', '', '', '', 'NPI']},
 'Segment4': {'Field1': ['OBR'],
  'Field2': ['1'],
  'Field3': ['341856649', 'HNAM_ORDERID'],
  'Field4': ['000002006326002362'],
  'Field5': ['648088', 'Basic Metabolic Panel'],
  'Field6': [''],
  'Field7': ['']},
 'Segment5': {'Field1': ['20061122151600'],
  'Field10': ['1620', 'Hooker', 'Robert', 'L'],
  'Field11': [''],
  'Field12': [''],
  'Field13': [''],
  'Field14': [''],
  'Field15': [''],
  'Field16': ['20061122154733'],
  'Field17': [''],
  'Field18': [''],
  'Field19': ['F'],
  'Field2': [''],
  'Field20': [''],
  'Field21': [''],
  'Field22': [''],
  'Field23': [''],
  'Field24': [''],
  'Field25': [''],
  'Field26': [''],
  'Field27': [''],
  'Field28': [''],
  'Field29': [''],
  'Field3': [''],
  'Field30': ['20061122140000'],
  'Field4': [''],
  'Field5': [''],
  'Field6': [''],
  'Field7': [''],
  'Field8': [''],
  'Field9': ['']},
 'Segment6': {'Field1': ['OBX'],
  'Field10': [''],
  'Field11': ['F'],
  'Field12': [''],
  'Field13': [''],
  'Field14': ['20061122154733/n'],
  'Field2': ['1'],
  'Field3': ['NM'],
  'Field4': ['GLU', 'Glucose Lvl'],
  'Field5': ['59'],
  'Field6': ['mg/dL'],
  'Field7': ['65-99', '65', '99'],
  'Field8': ['L'],
  'Field9': ['']}}

In [ ]: