In [39]:

    
import numpy as np
import pandas as pd

from numpy import array
from scipy.signal import argrelextrema



In [40]:

    
# read in dataset
xl = pd.ExcelFile("data/130N_Cycles_1-47.xlsx")
df = xl.parse("Specimen_RawData_1")
df









    Out[40]:







  
    
      
      Time
      Load
    
  
  
    
      0
      0.0000
      0.06729
    
    
      1
      0.0018
      0.07128
    
    
      2
      0.1000
      0.05453
    
    
      3
      0.2000
      0.00621
    
    
      4
      0.3000
      0.00352
    
    
      5
      0.4000
      0.02063
    
    
      6
      0.5000
      -0.00168
    
    
      7
      0.6000
      -0.01183
    
    
      8
      0.7000
      -0.00167
    
    
      9
      0.8000
      -0.00656
    
    
      10
      0.9000
      -0.02108
    
    
      11
      1.0000
      -0.03623
    
    
      12
      1.1000
      -0.02316
    
    
      13
      1.2000
      -0.05095
    
    
      14
      1.3000
      -0.07705
    
    
      15
      1.4000
      -0.05499
    
    
      16
      1.5000
      -0.04504
    
    
      17
      1.6000
      -0.06794
    
    
      18
      1.7000
      -0.10153
    
    
      19
      1.8000
      -0.05887
    
    
      20
      1.9000
      -0.06541
    
    
      21
      2.0000
      -0.07291
    
    
      22
      2.1000
      -0.03818
    
    
      23
      2.2000
      -0.04398
    
    
      24
      2.3000
      0.00392
    
    
      25
      2.4000
      -0.02976
    
    
      26
      2.5000
      0.01042
    
    
      27
      2.6000
      0.02127
    
    
      28
      2.7000
      -0.01094
    
    
      29
      2.8000
      0.00993
    
    
      ...
      ...
      ...
    
    
      170071
      16768.0000
      -0.13978
    
    
      170072
      16768.1000
      -0.12339
    
    
      170073
      16768.1990
      -0.15841
    
    
      170074
      16768.2990
      -0.12322
    
    
      170075
      16768.4000
      -0.11189
    
    
      170076
      16768.5000
      -0.12804
    
    
      170077
      16768.6000
      -0.11056
    
    
      170078
      16768.6990
      -0.09291
    
    
      170079
      16768.7990
      -0.08599
    
    
      170080
      16768.9000
      -0.10398
    
    
      170081
      16769.0000
      -0.08131
    
    
      170082
      16769.1000
      -0.05598
    
    
      170083
      16769.1990
      -0.07377
    
    
      170084
      16769.2990
      -0.09031
    
    
      170085
      16769.4000
      -0.04457
    
    
      170086
      16769.5000
      -0.03088
    
    
      170087
      16769.6000
      -0.03074
    
    
      170088
      16769.6990
      -0.04886
    
    
      170089
      16769.7990
      -0.01057
    
    
      170090
      16769.9000
      -0.03553
    
    
      170091
      16770.0000
      -0.01936
    
    
      170092
      16770.1000
      0.00220
    
    
      170093
      16770.1990
      0.02822
    
    
      170094
      16770.2990
      0.02014
    
    
      170095
      16770.4000
      0.02088
    
    
      170096
      16770.5000
      0.05239
    
    
      170097
      16770.6000
      0.03399
    
    
      170098
      16770.6990
      0.05024
    
    
      170099
      16770.7990
      0.06006
    
    
      170100
      16770.9000
      0.09558
    
  

170101 rows × 2 columns

This is what the dataset currently looks like - it has 170,101 rows and two columns.

The dataset contains data from 47 cycles following an experiment. The output of these experiments form the two columns:

time (seconds)
load (exerted force, in Newtons)

My task is to predict the load for cycles 48, 49, and 50.

I will:
derive the time for each cycle
heating time for each cycle
cooling time for each cycle
number of cycles (I can do this my finding the local maxima and minima throughout the data)



In [41]:

    
# append data from time column to list
time = []
for item in df.index:
    time.append(df["Time"][item])

# append data from load column to list
load = []
for item in df.index:
    load.append(df["Load"][item])



In [42]:

    
# convert time array to np array for further processing
np_time = array(time)



In [43]:

    
# for local maxima
max = argrelextrema(np_time, np.greater)
print("local maxima array for time is:", max, "\n")

# for local minima
min = argrelextrema(np_time, np.less)
print("local minima array for time is:", min)









    



local maxima array for time is: (array([], dtype=int64),) 

local minima array for time is: (array([], dtype=int64),)

The arrays above actually look empty...

After further research into Python's numpy library, I realized that argrelextrema with np.greater or np.less does NOT consider repeated values to be relative maxima (https://github.com/scipy/scipy/issues/3749).

A strict inequality is required to satisfy both sides of the point.



In [44]:

    
# for local maxima
max_ = argrelextrema(np_time, np.greater_equal)
print("local maxima array for time is:", max_, "\n")

# for local minima
min_ = argrelextrema(np_time, np.less_equal)
print("local minima array for time is:", min_)









    



local maxima array for time is: (array([ 31934,  47151,  55544,  58516,  61698,  84166,  87735,  91038,
       104427, 120421, 129657, 133953, 155582, 164994, 170100]),) 

local minima array for time is: (array([     0,  31935,  47152,  55545,  58517,  61699,  84167,  87736,
        91039, 104428, 120422, 129658, 133954, 155583, 164995]),)

I applied the _equal parameter to my argrelextrema function and notice that no duplicate values have occurred, which is a good sign so far.



In [45]:

    
print("The length of the max array for time is:",np.size(max_), "\n")
print("The length of the min array for time is:",np.size(min_), "\n")









    



The length of the max array for time is: 15 

The length of the min array for time is: 15

However, it's odd that the numbers returned from each array is only 15...considering there are 47 cycles present in the dataset. So, I will try another method instead (https://docs.scipy.org/doc/numpy/reference/generated/numpy.r_.html).



In [46]:

    
row_wise_merging = np.r_[True, np_time[1:] < np_time[:-1]] & np.r_[np_time[:-1] < np_time[1:], True]



In [47]:

    
# print side of row_wise_merging
size = np.size(row_wise_merging)
print(size)
print(row_wise_merging)









    



170101
[ True False False ..., False False False]



In [48]:

    
count = 0
for i in np.nditer(row_wise_merging):
    if i == True:
        count +=1 
print(count)

The output of numpy's row wise merging method does not prove to be helpful as well.

Since I am not familiar with how to compute local maxima and minima manually on my own, I will need to do further research on that.

As of now, I am planning to proceed with other calculations based on the array returned from argrelextrema. When I retrieve the right array for time, I believe I can modify my presented algorithm below.



In [49]:

    
# places indices returned from local maxima into a list
local_max_indices = []
for idx in np.nditer(max_):
    local_max_indices.append(idx)   
print(local_max_indices)









    



[array(31934), array(47151), array(55544), array(58516), array(61698), array(84166), array(87735), array(91038), array(104427), array(120421), array(129657), array(133953), array(155582), array(164994), array(170100)]



In [50]:

    
# create a list of sums of time and load up until
# index in local_max_indices list
concat_data = []
for idx, (t, l) in enumerate(zip(time, load)):
   # print(idx, t, l)
    for item in local_max_indices:
        if idx == item:
            concat_data.append((sum(time[:idx]),sum(load[:idx])))

for item in range(len(concat_data)):
    print("Cycle", item)
    print("Time:", concat_data[item][0])
    print("Load:", concat_data[item][1])
    print("\n")









    



Cycle 0
Time: 50090330.3853
Load: 886827.10324


Cycle 1
Time: 109224942.15
Load: 1336267.88375


Cycle 2
Time: 151578795.284
Load: 1544432.4858


Cycle 3
Time: 168237161.8
Load: 1634193.77493


Cycle 4
Time: 187035884.677
Load: 1731646.36961


Cycle 5
Time: 348122651.618
Load: 2427467.38261


Cycle 6
Time: 378284414.766
Load: 2573697.19744


Cycle 7
Time: 407315820.051
Load: 2683299.27422


Cycle 8
Time: 536007570.556
Load: 3108964.80223


Cycle 9
Time: 712921430.822
Load: 3680416.99285


Cycle 10
Time: 826598329.026
Load: 4024847.81762


Cycle 11
Time: 882348975.542
Load: 4191389.98264


Cycle 12
Time: 1190793342.17
Load: 5124183.32977


Cycle 13
Time: 1339483600.32
Load: 5513159.5854


Cycle 14
Time: 1423820416.46
Load: 5761623.5985

As mentioned before, the results above are unrealistic since we know that there are 47 cycles (rather than the 15 that were outputted) that exist. Once I have the correct values returned from the local maxima function, I can proceed with modifying my code for that array.

My next step: to implement an algorithm that would make the actual predictions.



In [ ]:



In [ ]:

	Time	Load
0	0.0000	0.06729
1	0.0018	0.07128
2	0.1000	0.05453
3	0.2000	0.00621
4	0.3000	0.00352
5	0.4000	0.02063
6	0.5000	-0.00168
7	0.6000	-0.01183
8	0.7000	-0.00167
9	0.8000	-0.00656
10	0.9000	-0.02108
11	1.0000	-0.03623
12	1.1000	-0.02316
13	1.2000	-0.05095
14	1.3000	-0.07705
15	1.4000	-0.05499
16	1.5000	-0.04504
17	1.6000	-0.06794
18	1.7000	-0.10153
19	1.8000	-0.05887
20	1.9000	-0.06541
21	2.0000	-0.07291
22	2.1000	-0.03818
23	2.2000	-0.04398
24	2.3000	0.00392
25	2.4000	-0.02976
26	2.5000	0.01042
27	2.6000	0.02127
28	2.7000	-0.01094
29	2.8000	0.00993
...	...	...
170071	16768.0000	-0.13978
170072	16768.1000	-0.12339
170073	16768.1990	-0.15841
170074	16768.2990	-0.12322
170075	16768.4000	-0.11189
170076	16768.5000	-0.12804
170077	16768.6000	-0.11056
170078	16768.6990	-0.09291
170079	16768.7990	-0.08599
170080	16768.9000	-0.10398
170081	16769.0000	-0.08131
170082	16769.1000	-0.05598
170083	16769.1990	-0.07377
170084	16769.2990	-0.09031
170085	16769.4000	-0.04457
170086	16769.5000	-0.03088
170087	16769.6000	-0.03074
170088	16769.6990	-0.04886
170089	16769.7990	-0.01057
170090	16769.9000	-0.03553
170091	16770.0000	-0.01936
170092	16770.1000	0.00220
170093	16770.1990	0.02822
170094	16770.2990	0.02014
170095	16770.4000	0.02088
170096	16770.5000	0.05239
170097	16770.6000	0.03399
170098	16770.6990	0.05024
170099	16770.7990	0.06006
170100	16770.9000	0.09558