basic dstruct usage


1. sift through a data set for relevant information:


In [1]:
from dstruct import DataStruct, DataField

In [2]:
raw_data = {
    "a": 1,
    "b": {
        "c": 2,
        "d": 3
    }
}

In [3]:
from dstruct import DataStruct, DataField

class A(DataStruct):
    
    a = DataField()
    c = DataField('b', 'c')
    d = DataField('b', 'd')

In [4]:
A(raw_data)


Out[4]:
{"a": 1, "c": 2, "d": 3}

2. import data sets from json files


In [5]:
from dstruct import DataStructFromJSON, DataField

In [6]:
class AccountSummaryFromJSON(DataStructFromJSON):
    
    user = DataField()
    type = DataField('account', 'account-type')
    ballance = DataField('account', 'account-ballance')
    # you can pass functions under the keyword "parser" to parse raw data parsing
    account_number = DataField('account', 'account-number', parser=lambda s: 'X'*len(s[:-4])+s[-4:])

In [7]:
AccountSummaryFromJSON('data_files/bank_data.json')


Out[7]:
{"ballance": 1234.56, "type": "checking", "user": "John F. Doe", "account_number": "XXXXX6789"}

3. import data sets from csv files:

  • The datafield decorator, wraps complex parsers

In [8]:
from dstruct import DataStructFromCSV, datafield

In [9]:
class AverageUserFromCSV(DataStructFromCSV):

    @datafield(path=None)
    def age(self, data):
        total = 0
        for name in data:
            total += int(data[name]['Age'])
        return round(float(total)/len(data), 1)
    
    @datafield(path=None)
    def weight(self, data):
        total = 0
        for name in data:
            total += int(data[name]['Weight'])
        return round(float(total)/len(data), 1)

In [10]:
AverageUserFromCSV('data_files/wide.csv')


Out[10]:
{"age": 40.0, "weight": 174.3}

+ DataStructFromCSV understands wide and narrow form data representations


In [11]:
narrow = AverageUserFromCSV('data_files/narrow.csv')
Person Variable Value
Bob Age 32
Bob Weight 178
Alice Age 24
Alice Weight 150
Steve Age 64
Steve Weight 195

In [12]:
wide = AverageUserFromCSV('data_files/wide.csv')
Person Age Weigth
Bob 32 178
Alice 24 150
Steve 64 195

In [13]:
narrow == wide


Out[13]:
True

In [ ]: