In [44]:
import pandas as pd
pd.__version__


Out[44]:
'1.0.3'

In [45]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [46]:
# Storing the data in a pandas data frame.

df = pd.read_csv("data/PewDiePie.csv")
df.head(10)


Out[46]:
Date Subscribers
0 1 71915
1 2 48270
2 3 47746
3 4 42276
4 5 36867
5 6 28722
6 7 29794
7 8 33125
8 9 27877
9 10 30675

In [47]:
x = df.iloc[:, 0]
x_df = pd.DataFrame(x)
y = df.iloc[:, 1]
print(x_df)
print(y)


    Date
0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
0     71915
1     48270
2     47746
3     42276
4     36867
5     28722
6     29794
7     33125
8     27877
9     30675
10    32450
11    45714
12    59377
13    42289
14    36002
15    25872
16    31623
17    46504
18    49421
19    27844
Name: Subscribers, dtype: int64

In [48]:
x_train, x_test, y_train, y_test = train_test_split(x_df, y, test_size = 0.3, random_state = 0)
print(x_test)
print(y_test)
print(x_train)
print(y_train)
df.head(10)


    Date
18    19
1      2
19    20
8      9
10    11
17    18
18    49421
1     48270
19    27844
8     27877
10    32450
17    46504
Name: Subscribers, dtype: int64
    Date
6      7
13    14
4      5
2      3
5      6
14    15
9     10
7      8
16    17
11    12
3      4
0      1
15    16
12    13
6     29794
13    42289
4     36867
2     47746
5     28722
14    36002
9     30675
7     33125
16    31623
11    45714
3     42276
0     71915
15    25872
12    59377
Name: Subscribers, dtype: int64
Out[48]:
Date Subscribers
0 1 71915
1 2 48270
2 3 47746
3 4 42276
4 5 36867
5 6 28722
6 7 29794
7 8 33125
8 9 27877
9 10 30675

In [49]:
# training a simple linear regression model
simpleLinearRegression = LinearRegression()
simpleLinearRegression.fit(x_train, y_train)


Out[49]:
LinearRegression()

In [50]:
# test trained model
y_predict = simpleLinearRegression.predict(x_test)

predict = pd.DataFrame({'X Test': x_test, 'Y Predict': y_predict})
predict.head(10)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_arrays(arrays, names, axes)
   1670         blocks = form_blocks(arrays, names, axes)
-> 1671         mgr = BlockManager(blocks, axes)
   1672         mgr._consolidate_inplace()

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\managers.py in __init__(self, blocks, axes, do_integrity_check)
    138         if do_integrity_check:
--> 139             self._verify_integrity()
    140 

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\managers.py in _verify_integrity(self)
    333             if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
--> 334                 construction_error(tot_items, block.shape[1:], self.axes)
    335         if len(self.items) != tot_items:

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e)
   1693         raise ValueError("Empty data passed with indices specified.")
-> 1694     raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
   1695 

ValueError: Shape of passed values is (1, 3), indices imply (6, 3)

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-50-71c4249f1be0> in <module>
      2 y_predict = simpleLinearRegression.predict(x_test)
      3 
----> 4 predict = pd.DataFrame({'X Test': x_test, 'Y Predict': y_predict, 'Y real': y_test})
      5 predict.head(10)
      6 

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
    433             )
    434         elif isinstance(data, dict):
--> 435             mgr = init_dict(data, index, columns, dtype=dtype)
    436         elif isinstance(data, ma.MaskedArray):
    437             import numpy.ma.mrecords as mrecords

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\construction.py in init_dict(data, index, columns, dtype)
    252             arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
    253         ]
--> 254     return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
    255 
    256 

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\construction.py in arrays_to_mgr(arrays, arr_names, index, columns, dtype)
     72     axes = [ensure_index(columns), index]
     73 
---> 74     return create_block_manager_from_arrays(arrays, arr_names, axes)
     75 
     76 

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\managers.py in create_block_manager_from_arrays(arrays, names, axes)
   1673         return mgr
   1674     except ValueError as e:
-> 1675         construction_error(len(arrays), arrays[0].shape, axes, e)
   1676 
   1677 

D:\CourseMSSR\git\course-social-robotics\machine_learning_jupyter\venv\lib\site-packages\pandas\core\internals\managers.py in construction_error(tot_items, block_shape, axes, e)
   1692     if block_shape[0] == 0:
   1693         raise ValueError("Empty data passed with indices specified.")
-> 1694     raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
   1695 
   1696 

ValueError: Shape of passed values is (1, 3), indices imply (6, 3)