In [81]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
msgtxt_1 = "MSH|^~\&|ADT1|MCM|LABADT|MCM|198808181126|SECURITY|ADT^A04|MSG00001|P|2.4\rEVN|A01-|198808181123\rPID|||PATID1234^5^M11||JONES^WILLIAM^A^III||19610615|M-||2106-3|1200 N ELM STREET^^GREENSBORO^NC^27401-1020|GL|(919)379-1212|(919)271-3434~(919)277-3114||S||PATID12345001^2^M10|123456789|9-87654^NC\rNK1|1|JONES^BARBARA^K|SPO|||||20011105\rNK1|1|JONES^MICHAEL^A|FTH\rPV1|1|I|2000^2012^01||||004777^LEBAUER^SIDNEY^J.|||SUR||-||1|A0-\rAL1|1||^PENICILLIN||PRODUCES HIVES~RASH\rAL1|2||^CAT DANDER\rDG1|001|I9|1550|MAL NEO LIVER, PRIMARY|19880501103005|F||\rPR1|2234|M11|111^CODE151|COMMON PROCEDURES|198809081123\rROL|45^RECORDER^ROLE MASTER LIST|AD|CP|KATE^SMITH^ELLEN|199505011201\rGT1|1122|1519|BILL^GATES^A\rIN1|001|A357|1234|BCMD|||||132987\rIN2|ID1551001|SSN12345678\rROL|45^RECORDER^ROLE MASTER LIST|AD|CP|KATE^ELLEN|199505011201"
msgtxt_2 = "MSH|^~\&|AccMgr|1|||20050110045504||ADT^A05|599102|P|2.3|||\rEVN|A01|20050110045502|||||\rPID|1||10006579^^^1^MRN^1||DUCK^DONALD^D||19241010|M||1|111 DUCK ST^^FOWL^CA^999990000^^M|1|8885551212|8885551212|1|2||40007716^^^AccMgr^VN^1|123121234|||||||||||NO\rNK1|1|DUCK^HUEY|SO|3583 DUCK RD^^FOWL^CA^999990000|8885552222||Y||||||||||||||\rPV1|1|I|PREOP^101^1^1^^^S|3|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|||01||||1|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|2|40007716^^^AccMgr^VN|4|||||||||||||||||||1||G|||20050110045253||||||\rGT1|1|8291|DUCK^DONALD^D||111^DUCK ST^^FOWL^CA^999990000|8885551212||19241010|M||1|123121234||||#Cartoon Ducks Inc|111^DUCK ST^^FOWL^CA^999990000|8885551212||PT|\rDG1|1|I9|71596^OSTEOARTHROS NOS-L/LEG ^I9|OSTEOARTHROS NOS-L/LEG ||A|\rIN1|1|MEDICARE|3|MEDICARE|||||||Cartoon Ducks Inc|19891001|||4|DUCK^DONALD^D|1|19241010|111^DUCK ST^^FOWL^CA^999990000|||||||||||||||||123121234A||||||PT|M|111 DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|1||123121234|Cartoon Ducks Inc|||123121234A|||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|2|NON-PRIMARY|9|MEDICAL MUTUAL CALIF.|PO BOX 94776^^HOLLYWOOD^CA^441414776||8003621279|PUBSUMB|||Cartoon Ducks Inc||||7|DUCK^DONALD^D|1|19241010|111 DUCK ST^^FOWL^CA^999990000|||||||||||||||||056269770||||||PT|M|111^DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|2||123121234|Cartoon Ducks Inc||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|3|SELF PAY|1|SELF PAY|||||||||||5||1"
msgtxt_3 = "MSH|^~\&|AccMgr|1|||20050110045504||ADT^A08|599102|P|2.3|||\rEVN|A01|20050110045502|||||\rPID|1||10006579^^^1^MRN^1||DUCK^DONALD^D||19241010|M||1|111 DUCK ST^^FOWL^CA^999990000^^M|1|8885551212|8885551212|1|2||40007716^^^AccMgr^VN^1|123121234|||||||||||NO\rNK1|1|DUCK^HUEY|SO|3583 DUCK RD^^FOWL^CA^999990000|8885552222||Y||||||||||||||\rPV1|1|I|PREOP^101^1^1^^^S|3|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|||01||||1|||37^DISNEY^WALT^^^^^^AccMgr^^^^CI|2|40007716^^^AccMgr^VN|4|||||||||||||||||||1||G|||20050110045253||||||\rGT1|1|8291|DUCK^DONALD^D||111^DUCK ST^^FOWL^CA^999990000|8885551212||19241010|M||1|123121234||||#Cartoon Ducks Inc|111^DUCK ST^^FOWL^CA^999990000|8885551212||PT|\rDG1|1|I9|71596^OSTEOARTHROS NOS-L/LEG ^I9|OSTEOARTHROS NOS-L/LEG ||A|\rIN1|1|MEDICARE|3|MEDICARE|||||||Cartoon Ducks Inc|19891001|||4|DUCK^DONALD^D|1|19241010|111^DUCK ST^^FOWL^CA^999990000|||||||||||||||||123121234A||||||PT|M|111 DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|1||123121234|Cartoon Ducks Inc|||123121234A|||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|2|NON-PRIMARY|9|MEDICAL MUTUAL CALIF.|PO BOX 94776^^HOLLYWOOD^CA^441414776||8003621279|PUBSUMB|||Cartoon Ducks Inc||||7|DUCK^DONALD^D|1|19241010|111 DUCK ST^^FOWL^CA^999990000|||||||||||||||||056269770||||||PT|M|111^DUCK ST^^FOWL^CA^999990000|||||8291\rIN2|2||123121234|Cartoon Ducks Inc||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||8885551212\rIN1|3|SELF PAY|1|SELF PAY|||||||||||5||1"
msgfields_1 = msgtxt_1.split(sep="|")
type (msgtxt_1) #string#
type (msgfields) #list#
##I need to turn it into a series in order to put it into a data frame (DF). I'm hoping to be
##able to count the number of sections I will have in order to create and accurate index in
##the DF
msgfields
##I count 105 different fields, so the index in my DF will need to be 1-105
##(hopefully, there's an easier way)
Out[81]:
In [86]:
##http://python-hl7.readthedocs.io/en/latest/index.html#result-tree## - COULDN'T GET PARSER TO WORK
##Message[segment][field][repetition][component][sub-component]## - DON'T LIKE THIS SOLUTION
##http://pandas.pydata.org/pandas-docs/stable/dsintro.html##
s = pd.Series(data=msgfields_1, index=None)
s
##I did NOT need to type out 1-105 to get python to index these for me... ugh...
##1,2,3,4,5,6,7,8,9,10,11,12,13,1,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105
## This is now officially a SERIES, which means I should (hopefully) be able to put it into a DF
## This is just proof of concept right now. I will worry about sub-segments later IF I get it to work
Out[86]:
In [90]:
##http://pandas.pydata.org/pandas-docs/stable/dsintro.html
##In [32]: d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
## ....: 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
## the above will be handy for combining MULTIPLE messages (read: lists) into DFs
##In [33]: df = pd.DataFrame(d)
df = pd.DataFrame(s)
df
Out[90]:
In [93]:
##http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_json.html
##DataFrame.to_json(path_or_buf=None, orient=None, date_format='epoch', double_precision=10, force_ascii=True, date_unit='ms', default_handler=None)
##Convert the object to a JSON string.
df.to_json()
Out[93]:
In [ ]: