In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


msgtxt_1 = "MSH|^~\&|ADT1|MCM|LABADT|MCM|198808181126|SECURITY|ADT^A04|MSG00001|P|2.4\rEVN|A01-|198808181123\rPID|||PATID1234^5^M11||JONES^WILLIAM^A^III||19610615|M-||2106-3|1200 N ELM STREET^^GREENSBORO^NC^27401-1020|GL|(919)379-1212|(919)271-3434~(919)277-3114||S||PATID12345001^2^M10|123456789|9-87654^NC\rNK1|1|JONES^BARBARA^K|SPO|||||20011105\rNK1|1|JONES^MICHAEL^A|FTH\rPV1|1|I|2000^2012^01||||004777^LEBAUER^SIDNEY^J.|||SUR||-||1|A0-\rAL1|1||^PENICILLIN||PRODUCES HIVES~RASH\rAL1|2||^CAT DANDER\rDG1|001|I9|1550|MAL NEO LIVER, PRIMARY|19880501103005|F||\rPR1|2234|M11|111^CODE151|COMMON PROCEDURES|198809081123\rROL|45^RECORDER^ROLE MASTER LIST|AD|CP|KATE^SMITH^ELLEN|199505011201\rGT1|1122|1519|BILL^GATES^A\rIN1|001|A357|1234|BCMD|||||132987\rIN2|ID1551001|SSN12345678\rROL|45^RECORDER^ROLE MASTER LIST|AD|CP|KATE^ELLEN|199505011201"
msgtxt_2 = "MSH|^~\&|AccMgr|1|||20050110045504||ADT^A05|599102|P|2.3|||\rEVN|A01|20050110045502|||||\rPID|1||10006579^^^1^MRN^1||DUCK^DONALD^D||19241010|M||1|111 DUCK ST^^FOWL^CA^999990000^^M|1|8885551212|8885551212|1|2||40007716^^^AccMgr^VN^1|123121234|||||||||||NO\rNK1|1|DUCK^HUEY|SO|3583 DUCK RD^^FOWL^CA^999990000|8885552222||Y||||||||||||||\rPV1|1|I|PREOP^101^1^1^^^S|3|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|||01||||1|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|2|40007716^^^AccMgr^VN|4|||||||||||||||||||1||G|||20050110045253||||||\rGT1|1|8291|DUCK^DONALD^D||111^DUCK ST^^FOWL^CA^999990000|8885551212||19241010|M||1|123121234||||#Cartoon Ducks Inc|111^DUCK ST^^FOWL^CA^999990000|8885551212||PT|\rDG1|1|I9|71596^OSTEOARTHROS NOS-L/LEG ^I9|OSTEOARTHROS NOS-L/LEG ||A|\rIN1|1|MEDICARE|3|MEDICARE|||||||Cartoon Ducks Inc|19891001|||4|DUCK^DONALD^D|1|19241010|111^DUCK ST^^FOWL^CA^999990000|||||||||||||||||123121234A||||||PT|M|111 DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|1||123121234|Cartoon Ducks Inc|||123121234A|||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|2|NON-PRIMARY|9|MEDICAL MUTUAL CALIF.|PO BOX 94776^^HOLLYWOOD^CA^441414776||8003621279|PUBSUMB|||Cartoon Ducks Inc||||7|DUCK^DONALD^D|1|19241010|111 DUCK ST^^FOWL^CA^999990000|||||||||||||||||056269770||||||PT|M|111^DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|2||123121234|Cartoon Ducks Inc||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|3|SELF PAY|1|SELF PAY|||||||||||5||1" 
msgtxt_3 = "MSH|^~\&|AccMgr|1|||20050110045504||ADT^A08|599102|P|2.3|||\rEVN|A01|20050110045502|||||\rPID|1||10006579^^^1^MRN^1||DUCK^DONALD^D||19241010|M||1|111 DUCK ST^^FOWL^CA^999990000^^M|1|8885551212|8885551212|1|2||40007716^^^AccMgr^VN^1|123121234|||||||||||NO\rNK1|1|DUCK^HUEY|SO|3583 DUCK RD^^FOWL^CA^999990000|8885552222||Y||||||||||||||\rPV1|1|I|PREOP^101^1^1^^^S|3|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|||01||||1|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|2|40007716^^^AccMgr^VN|4|||||||||||||||||||1||G|||20050110045253||||||\rGT1|1|8291|DUCK^DONALD^D||111^DUCK ST^^FOWL^CA^999990000|8885551212||19241010|M||1|123121234||||#Cartoon Ducks Inc|111^DUCK ST^^FOWL^CA^999990000|8885551212||PT|\rDG1|1|I9|71596^OSTEOARTHROS NOS-L/LEG ^I9|OSTEOARTHROS NOS-L/LEG ||A|\rIN1|1|MEDICARE|3|MEDICARE|||||||Cartoon Ducks Inc|19891001|||4|DUCK^DONALD^D|1|19241010|111^DUCK ST^^FOWL^CA^999990000|||||||||||||||||123121234A||||||PT|M|111 DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|1||123121234|Cartoon Ducks Inc|||123121234A|||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|2|NON-PRIMARY|9|MEDICAL MUTUAL CALIF.|PO BOX 94776^^HOLLYWOOD^CA^441414776||8003621279|PUBSUMB|||Cartoon Ducks Inc||||7|DUCK^DONALD^D|1|19241010|111 DUCK ST^^FOWL^CA^999990000|||||||||||||||||056269770||||||PT|M|111^DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|2||123121234|Cartoon Ducks Inc||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|3|SELF PAY|1|SELF PAY|||||||||||5||1"

msgfields_1 = msgtxt_1.split(sep="|")


type (msgtxt_1) #string#

type (msgfields) #list#

##I need to turn it into a series in order to put it into a data frame (DF). I'm hoping to be 
##able to count the number of sections I will have in order to create and accurate index in 
##the DF

msgfields

##I count 105 different fields, so the index in my DF will need to be 1-105 
##(hopefully, there's an easier way)


Out[81]:
['MSH',
 '^~\\&',
 'ADT1',
 'MCM',
 'LABADT',
 'MCM',
 '198808181126',
 'SECURITY',
 'ADT^A04',
 'MSG00001',
 'P',
 '2.4\rEVN',
 'A01-',
 '198808181123\rPID',
 '',
 '',
 'PATID1234^5^M11',
 '',
 'JONES^WILLIAM^A^III',
 '',
 '19610615',
 'M-',
 '',
 '2106-3',
 '1200 N ELM STREET^^GREENSBORO^NC^27401-1020',
 'GL',
 '(919)379-1212',
 '(919)271-3434~(919)277-3114',
 '',
 'S',
 '',
 'PATID12345001^2^M10',
 '123456789',
 '9-87654^NC\rNK1',
 '1',
 'JONES^BARBARA^K',
 'SPO',
 '',
 '',
 '',
 '',
 '20011105\rNK1',
 '1',
 'JONES^MICHAEL^A',
 'FTH\rPV1',
 '1',
 'I',
 '2000^2012^01',
 '',
 '',
 '',
 '004777^LEBAUER^SIDNEY^J.',
 '',
 '',
 'SUR',
 '',
 '-',
 '',
 '1',
 'A0-\rAL1',
 '1',
 '',
 '^PENICILLIN',
 '',
 'PRODUCES HIVES~RASH\rAL1',
 '2',
 '',
 '^CAT DANDER\rDG1',
 '001',
 'I9',
 '1550',
 'MAL NEO LIVER, PRIMARY',
 '19880501103005',
 'F',
 '',
 '\rPR1',
 '2234',
 'M11',
 '111^CODE151',
 'COMMON PROCEDURES',
 '198809081123\rROL',
 '45^RECORDER^ROLE MASTER LIST',
 'AD',
 'CP',
 'KATE^SMITH^ELLEN',
 '199505011201\rGT1',
 '1122',
 '1519',
 'BILL^GATES^A\rIN1',
 '001',
 'A357',
 '1234',
 'BCMD',
 '',
 '',
 '',
 '',
 '132987\rIN2',
 'ID1551001',
 'SSN12345678\rROL',
 '45^RECORDER^ROLE MASTER LIST',
 'AD',
 'CP',
 'KATE^ELLEN',
 '199505011201']

In [86]:
##http://python-hl7.readthedocs.io/en/latest/index.html#result-tree## - COULDN'T GET PARSER TO WORK
##Message[segment][field][repetition][component][sub-component]## - DON'T LIKE THIS SOLUTION
##http://pandas.pydata.org/pandas-docs/stable/dsintro.html##

s = pd.Series(data=msgfields_1, index=None)
s

##I did NOT need to type out 1-105 to get python to index these for me... ugh...

##1,2,3,4,5,6,7,8,9,10,11,12,13,1,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105
## This is now officially a SERIES, which means I should (hopefully) be able to put it into a DF
## This is just proof of concept right now. I will worry about sub-segments later IF I get it to work


Out[86]:
0                                              MSH
1                                             ^~\&
2                                             ADT1
3                                              MCM
4                                           LABADT
5                                              MCM
6                                     198808181126
7                                         SECURITY
8                                          ADT^A04
9                                         MSG00001
10                                               P
11                                        2.4\rEVN
12                                            A01-
13                               198808181123\rPID
14                                                
15                                                
16                                 PATID1234^5^M11
17                                                
18                             JONES^WILLIAM^A^III
19                                                
20                                        19610615
21                                              M-
22                                                
23                                          2106-3
24     1200 N ELM STREET^^GREENSBORO^NC^27401-1020
25                                              GL
26                                   (919)379-1212
27                     (919)271-3434~(919)277-3114
28                                                
29                                               S
                          ...                     
75                                           \rPR1
76                                            2234
77                                             M11
78                                     111^CODE151
79                               COMMON PROCEDURES
80                               198809081123\rROL
81                    45^RECORDER^ROLE MASTER LIST
82                                              AD
83                                              CP
84                                KATE^SMITH^ELLEN
85                               199505011201\rGT1
86                                            1122
87                                            1519
88                               BILL^GATES^A\rIN1
89                                             001
90                                            A357
91                                            1234
92                                            BCMD
93                                                
94                                                
95                                                
96                                                
97                                     132987\rIN2
98                                       ID1551001
99                                SSN12345678\rROL
100                   45^RECORDER^ROLE MASTER LIST
101                                             AD
102                                             CP
103                                     KATE^ELLEN
104                                   199505011201
dtype: object

In [90]:
##http://pandas.pydata.org/pandas-docs/stable/dsintro.html
##In [32]: d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
##   ....:      'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
## the above will be handy for combining MULTIPLE messages (read: lists) into DFs
##In [33]: df = pd.DataFrame(d)

df = pd.DataFrame(s)
df


Out[90]:
0
0 MSH
1 ^~\&
2 ADT1
3 MCM
4 LABADT
5 MCM
6 198808181126
7 SECURITY
8 ADT^A04
9 MSG00001
10 P
11 2.4\rEVN
12 A01-
13 198808181123\rPID
14
15
16 PATID1234^5^M11
17
18 JONES^WILLIAM^A^III
19
20 19610615
21 M-
22
23 2106-3
24 1200 N ELM STREET^^GREENSBORO^NC^27401-1020
25 GL
26 (919)379-1212
27 (919)271-3434~(919)277-3114
28
29 S
... ...
75 \rPR1
76 2234
77 M11
78 111^CODE151
79 COMMON PROCEDURES
80 198809081123\rROL
81 45^RECORDER^ROLE MASTER LIST
82 AD
83 CP
84 KATE^SMITH^ELLEN
85 199505011201\rGT1
86 1122
87 1519
88 BILL^GATES^A\rIN1
89 001
90 A357
91 1234
92 BCMD
93
94
95
96
97 132987\rIN2
98 ID1551001
99 SSN12345678\rROL
100 45^RECORDER^ROLE MASTER LIST
101 AD
102 CP
103 KATE^ELLEN
104 199505011201

105 rows × 1 columns


In [93]:
##http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html

##DataFrame.to_json(path_or_buf=None, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None)
##Convert the object to a JSON string.

df.to_json()


Out[93]:
'{"0":{"0":"MSH","1":"^~\\\\&","2":"ADT1","3":"MCM","4":"LABADT","5":"MCM","6":"198808181126","7":"SECURITY","8":"ADT^A04","9":"MSG00001","10":"P","11":"2.4\\rEVN","12":"A01-","13":"198808181123\\rPID","14":"","15":"","16":"PATID1234^5^M11","17":"","18":"JONES^WILLIAM^A^III","19":"","20":"19610615","21":"M-","22":"","23":"2106-3","24":"1200 N ELM STREET^^GREENSBORO^NC^27401-1020","25":"GL","26":"(919)379-1212","27":"(919)271-3434~(919)277-3114","28":"","29":"S","30":"","31":"PATID12345001^2^M10","32":"123456789","33":"9-87654^NC\\rNK1","34":"1","35":"JONES^BARBARA^K","36":"SPO","37":"","38":"","39":"","40":"","41":"20011105\\rNK1","42":"1","43":"JONES^MICHAEL^A","44":"FTH\\rPV1","45":"1","46":"I","47":"2000^2012^01","48":"","49":"","50":"","51":"004777^LEBAUER^SIDNEY^J.","52":"","53":"","54":"SUR","55":"","56":"-","57":"","58":"1","59":"A0-\\rAL1","60":"1","61":"","62":"^PENICILLIN","63":"","64":"PRODUCES HIVES~RASH\\rAL1","65":"2","66":"","67":"^CAT DANDER\\rDG1","68":"001","69":"I9","70":"1550","71":"MAL NEO LIVER, PRIMARY","72":"19880501103005","73":"F","74":"","75":"\\rPR1","76":"2234","77":"M11","78":"111^CODE151","79":"COMMON PROCEDURES","80":"198809081123\\rROL","81":"45^RECORDER^ROLE MASTER LIST","82":"AD","83":"CP","84":"KATE^SMITH^ELLEN","85":"199505011201\\rGT1","86":"1122","87":"1519","88":"BILL^GATES^A\\rIN1","89":"001","90":"A357","91":"1234","92":"BCMD","93":"","94":"","95":"","96":"","97":"132987\\rIN2","98":"ID1551001","99":"SSN12345678\\rROL","100":"45^RECORDER^ROLE MASTER LIST","101":"AD","102":"CP","103":"KATE^ELLEN","104":"199505011201"}}'

In [ ]: