let's open the csv file
In [1]:
%ls -lh ../data/csv
In [2]:
import pandas as pd
import os
In [3]:
parent_path = os.path.dirname(os.getcwd())
csv_input_dir_path = os.path.join(parent_path, 'data', 'csv')
csv_preprocess_dir_path = os.path.join(csv_input_dir_path, 'preprocess')
csv_output_dir_path = os.path.join(csv_input_dir_path, 'final')
file_name = '733bbfef'
csv_file_name = file_name + '.csv'
csv_output_file_name = file_name + '_preprocess.csv'
csv_file_path = os.path.join(csv_input_dir_path, csv_file_name)
csv_output_file_path = os.path.join(csv_preprocess_dir_path, csv_output_file_name)
In [4]:
df = pd.read_csv(csv_file_path, header=0)
print("%d rows" % df.shape[0])
df.head(3)
Out[4]:
In [5]:
df.tail(3)
Out[5]:
open one of the image files
In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image
img_base_path = os.path.join(parent_path, 'data', 'img')
img_dir_path = os.path.join(img_base_path, 'raw', file_name)
img_front_dir_path = os.path.join(img_base_path, 'front')
img_left_dir_path = os.path.join(img_base_path, 'side_left')
img_right_dir_path = os.path.join(img_base_path, 'side_right')
In [22]:
img_path = os.path.join(img_dir_path, df['img'][0])
img = Image.open(img_path)
print("image size is %s" % (img.size,))
fig = plt.figure(figsize = (10,10))
plt.imshow(img)
Out[22]:
As shown above, the screenshot from Euro Truck Simulator(ETS) has several information, such as the route advisor showing the map, two side mirrors, the car dashboard, and the front view.
Let's divide the screenshot image such that there are three "cameras", each of which is facing the front, left side, and right side.
In [23]:
# define coordinates
front_coord = (289,167, 851, 508)
left_coord = (13, 119, 171, 331)
right_coord = (846, 118, 1004, 330)
In [24]:
img_front = img.crop(front_coord)
fig, ax = plt.subplots(figsize=(10, 10))
ax.set(title='front view')
ax.imshow(img_front)
Out[24]:
In [25]:
img_left = img.crop(left_coord)
img_right = img.crop(right_coord)
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, figsize=(7,7))
ax0.set(title='left side mirror view')
ax0.imshow(img_left)
ax1.set(title='right side mirror view')
ax1.imshow(img_right)
Out[25]:
Save the image to directory, and add the path to the csv file.
In [7]:
def rename_file(f, s):
# 2017.jpg -> 2017_front.jpg
return f.split('.')[0] + s + '.' + f.split('.')[1]
In [27]:
img_front_path_list = []
img_left_path_list = []
img_right_path_list = []
for img_filename in df['img']:
img_path = os.path.join(img_dir_path, img_filename)
img = Image.open(img_path)
img_front = img.crop(front_coord)
img_left = img.crop(left_coord)
img_right = img.crop(right_coord)
img_front_path = rename_file(img_filename, '_front')
img_left_path = rename_file(img_filename, '_left')
img_right_path = rename_file(img_filename, '_right')
img_front_path_list.append(img_front_path)
img_left_path_list.append(img_left_path)
img_right_path_list.append(img_right_path)
img_front.save(os.path.join(img_front_dir_path, img_front_path))
img_left.save(os.path.join(img_left_dir_path, img_left_path))
img_right.save(os.path.join(img_right_dir_path, img_right_path))
In [10]:
df['front'] = img_front_path_list
df['side_left'] = img_left_path_list
df['side_right'] = img_right_path_list
Check to csv file, and overwrite to the original file.
In [12]:
df.head(3)
Out[12]:
In [13]:
df.to_csv(csv_output_file_path, index=False)
Check to see that it was saved well
In [14]:
df = pd.read_csv(csv_output_file_path, header=0)
print("%d rows" % df.shape[0])
df.head(3)
Out[14]:
move original csv file to process dir
In [15]:
os.rename(csv_file_path, os.path.join(csv_preprocess_dir_path, csv_file_name))
In [16]:
prev_final_file = os.path.join(csv_output_dir_path, 'v4.csv')
cur_final_file = os.path.join(csv_output_dir_path, 'v5.csv')
In [17]:
df_prev = pd.read_csv(prev_final_file, header=0)
print("%d rows" % df_prev.shape[0])
df_prev.head(3)
Out[17]:
In [18]:
df_final = pd.concat([df_prev, df], ignore_index=True)
In [19]:
df_final.shape
Out[19]:
In [20]:
df_final.head(3)
Out[20]:
In [21]:
df.tail(3)
Out[21]:
In [22]:
df_final.to_csv(cur_final_file, mode='w', index=False, header=True)
In [23]:
df_final = pd.read_csv(cur_final_file, header=0)
print("%d rows" % df_final.shape[0])
df_final.head(3)
Out[23]:
In [ ]:
If needed, split dataset into train and validation data set
In [24]:
from sklearn.model_selection import train_test_split
In [25]:
df_train, df_valid = train_test_split(df_final, test_size = 0.2)
In [26]:
df_train.to_csv(os.path.join(parent_path, 'data', 'csv', 'final', 'v5_train.csv'), index=False)
In [27]:
df_valid.to_csv(os.path.join(parent_path, 'data', 'csv', 'final', 'v5_valid.csv'), index=False)
In [ ]:
In [ ]:
In [ ]:
In [ ]: