Title: Streaming Data Pipeline
Slug: streaming_data_pipeline
Summary: Streaming Data Pipeline Using Python.
Date: 2017-02-02 12:00
Category: Python
Tags: Basics
Authors: Chris Albon
In [1]:
raw_data = [1,2,3,4,5,6,7,8,9,10]
In [2]:
# Define a generator that yields input+6
def add_6(numbers):
for x in numbers:
output = x+6
yield output
# Define a generator that yields input-2
def subtract_2(numbers):
for x in numbers:
output = x-2
yield output
# Define a generator that yields input*100
def multiply_by_100(numbers):
for x in numbers:
output = x*100
yield output
In [3]:
# Step 1 of the pipeline
step1 = add_6(raw_data)
# Step 2 of the pipeline
step2 = subtract_2(step1)
# Step 3 of the pipeline
pipeline = multiply_by_100(step2)
In [4]:
# First element of the raw data
next(pipeline)
Out[4]:
In [5]:
# Second element of the raw data
next(pipeline)
Out[5]:
In [6]:
# Process all data
for raw_data in pipeline:
print(raw_data)