homework(1)
Time Series Classification¶
In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
In [2]:
train_csv = os.listdir(‘./training/’)
test_csv = os.listdir(‘./test/’)
(c) Feature Extraction¶
what types of time-domain features are usually used in time series classification is min, max, mean, median, std, 1st quart, 3st quart, etc
In [72]:
train = []
for csv in train_csv:
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
if activity == ‘1’ or activity == ‘2’:
activity = ‘1’
if activity != ‘1’ and activity != ‘2’:
activity = ‘0’
train_path = “./training/” + csv
df = pd.read_csv(train_path, skiprows=4)
data = df.drop(columns=[‘# Columns: time’])
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
train.append(instance)
In [73]:
col = [‘min_1′,’min_2′,’min_3′,’min_4′,’min_5′,’min_6’,
‘max_1′,’max_2′,’max_3′,’max_4′,’max_5′,’max_6’,
‘mean_1′,’mean_2′,’mean_3′,’mean_4′,’mean_5′,’mean_6’,
‘median_1′,’median_2′,’median_3′,’median_4′,’median_5′,’median_6’,
‘std_1′,’std_2′,’std_3′,’std_4′,’std_5′,’std_6’,
‘1st_quart_1′,’1st_quart_2′,’1st_quart_3′,’1st_quart_4′,’1st_quart_5′,’1st_quart_6’,
‘3rd_quart_1′,’3rd_quart_2′,’3rd_quart_3′,’3rd_quart_4′,’3rd_quart_5′,’3rd_quart_6’,
‘activity’
]
train_pd = pd.DataFrame(train,columns=col)
In [74]:
train_pd
Out[74]:
min_1 min_2 min_3 min_4 min_5 min_6 max_1 max_2 max_3 max_4 … 1st_quart_4 1st_quart_5 1st_quart_6 3rd_quart_1 3rd_quart_2 3rd_quart_3 3rd_quart_4 3rd_quart_5 3rd_quart_6 activity
0 36.33 0.0 1.50 0.0 1.00 0.00 47.67 1.66 21.25 4.11 … 0.0000 11.2500 0.4700 46.3300 0.5000 18.3300 0.5000 14.5000 1.0000 0
1 26.25 0.0 7.00 0.0 2.00 0.00 44.25 8.64 26.50 8.06 … 1.5800 14.0000 1.6600 40.2500 3.3500 17.3300 3.7700 18.2500 4.0300 0
2 19.50 0.0 7.00 0.0 7.00 0.00 46.00 12.47 22.67 8.34 … 1.7900 14.0000 2.1600 37.8125 5.8000 17.0000 4.1100 17.7500 4.3350 0
3 23.50 0.0 0.00 0.0 0.00 0.00 30.00 1.79 13.25 5.02 … 0.4300 5.5000 0.4300 29.0000 0.5000 8.3300 1.2625 10.7500 1.0000 0
4 29.75 0.0 6.50 0.0 2.67 0.00 48.00 4.60 21.00 6.52 … 0.0000 11.6700 0.4700 47.7500 0.5000 18.2500 0.7100 15.5000 1.2200 0
5 19.00 0.0 0.00 0.0 1.67 0.00 45.50 6.40 32.75 11.42 … 0.0000 15.0000 0.4700 38.0000 0.5000 20.6900 1.1200 20.8125 1.3000 1
6 39.00 0.0 0.00 0.0 0.00 0.00 40.00 1.00 12.20 3.27 … 0.0000 9.0000 0.4300 39.5000 0.5000 8.7500 0.5000 12.3300 0.8300 0
7 18.50 0.0 5.00 0.0 4.00 0.00 44.25 12.60 27.00 9.46 … 1.5000 14.0000 1.7975 39.3300 4.3300 18.3300 3.7800 18.0625 4.0600 0
8 37.00 0.0 5.75 0.0 8.00 0.00 48.00 1.58 27.00 10.03 … 0.4300 22.2500 0.4300 45.0000 0.5000 18.2700 1.1200 24.0000 0.8700 1
9 36.00 0.0 6.33 0.0 2.00 0.00 45.80 2.12 24.00 5.59 … 0.4300 7.6275 0.5000 44.6175 0.5050 18.0000 1.0375 12.0000 1.5850 0
10 33.00 0.0 3.00 0.0 23.67 0.00 45.75 2.83 28.25 6.42 … 0.4300 28.4575 0.0000 42.7500 0.7100 22.0625 1.1200 31.2500 0.5000 1
11 23.50 0.0 6.67 0.0 5.50 0.00 46.25 14.82 24.25 9.90 … 2.0575 13.7500 2.1700 38.2500 5.9325 17.2500 4.1900 18.0000 4.5000 0
12 0.00 0.0 0.00 0.0 0.00 0.00 40.67 1.00 13.25 4.03 … 0.4300 11.3300 0.4300 39.6700 0.5000 8.7500 1.1200 13.0000 0.8700 0
13 29.25 0.0 0.00 0.0 2.00 0.00 46.00 4.72 24.00 6.12 … 0.0000 13.7300 0.4300 44.5000 0.5000 20.0000 1.0000 19.0000 1.2200 0
14 36.25 0.0 0.00 0.0 0.00 0.00 45.50 2.60 22.00 4.72 … 0.0000 14.0000 0.4300 44.3300 0.5000 13.3300 0.8300 16.6900 0.9500 0
15 27.00 0.0 5.67 0.0 8.67 0.00 45.00 10.47 25.00 10.61 … 1.7000 15.5000 1.5600 40.2500 3.8700 17.6700 3.8700 19.2700 3.7700 0
16 27.00 0.0 2.50 0.0 2.00 0.00 38.67 2.49 23.75 5.12 … 0.0000 16.5000 0.4300 37.0000 1.0000 21.0000 0.8700 20.7500 1.0000 0
17 41.75 0.0 0.00 0.0 5.67 0.00 46.50 1.50 21.67 4.64 … 0.0000 14.0000 0.4300 44.5000 0.5000 18.4250 0.7200 18.3300 1.0900 0
18 36.25 0.0 1.50 0.0 11.33 0.00 48.00 1.50 26.33 5.17 … 0.0000 20.5000 0.0000 44.6700 0.5000 18.0000 0.9400 23.7500 0.8300 1
19 20.75 0.0 5.00 0.0 6.00 0.00 46.25 12.68 23.75 9.20 … 2.0500 14.2500 2.0500 38.2500 5.7225 17.2500 4.1950 18.3300 4.3050 0
20 42.00 0.0 0.00 0.0 1.00 0.00 45.00 1.12 23.00 5.45 … 0.0000 13.2500 0.4300 43.5000 0.5000 20.0000 0.7100 19.0000 1.0900 0
21 40.00 0.0 2.00 0.0 2.50 0.00 44.67 1.00 23.50 5.93 … 0.0000 9.7500 0.4300 43.2500 0.5000 15.0000 0.8300 22.0000 0.8300 0
22 34.50 0.0 5.00 0.0 1.00 0.00 47.75 2.18 21.33 5.54 … 0.0000 12.0000 0.4700 45.2500 0.5000 19.3300 0.7100 14.8125 1.2200 0
23 36.00 0.0 0.00 0.0 0.00 0.00 47.33 2.17 21.00 5.56 … 0.0000 12.7500 0.4300 44.7500 0.5000 14.3300 0.8200 16.5000 0.9400 0
24 33.00 0.0 8.50 0.0 20.00 0.00 47.75 3.00 30.00 5.15 … 0.4300 30.4575 0.0000 45.0000 1.1200 24.3725 1.3000 36.3300 1.0000 1
25 19.25 0.0 6.00 0.0 4.67 0.43 44.00 13.86 22.75 9.10 … 2.0500 13.7300 2.1200 38.0000 5.9000 17.3300 4.0375 17.7500 4.3750 0
26 28.50 0.0 0.00 0.0 1.50 0.00 48.25 3.28 24.25 6.98 … 0.4300 10.5000 0.5000 46.5000 0.7100 18.7500 1.4800 19.2500 1.7000 0
27 23.33 0.0 7.00 0.0 7.67 0.00 43.50 9.71 28.50 9.78 … 1.7750 15.7500 2.1700 39.2500 3.7400 21.0000 4.2400 21.0000 4.6175 0
28 27.50 0.0 1.50 0.0 0.00 0.00 50.75 6.06 23.25 6.02 … 0.0000 14.0000 0.4300 42.0000 0.5000 18.0000 0.8700 20.7500 1.0900 0
29 35.50 0.0 0.00 0.0 2.50 0.00 48.00 4.50 21.00 5.12 … 0.0000 12.0000 0.4700 48.0000 0.5000 18.5000 0.8300 15.2500 1.1200 0
… … … … … … … … … … … … … … … … … … … … … …
39 35.00 0.0 6.50 0.0 29.00 0.00 47.40 1.70 29.75 4.44 … 0.0000 35.3625 0.0000 45.0000 0.5000 24.0000 0.8300 36.5000 0.9400 1
40 18.33 0.0 7.67 0.0 6.67 0.00 47.67 12.48 23.33 9.01 … 2.0500 13.7500 2.0500 38.0000 5.7450 17.5000 4.2450 18.0000 4.3200 0
41 42.50 0.0 3.67 0.0 4.33 0.00 46.00 1.12 27.25 5.17 … 0.4300 17.5000 0.4300 45.0000 0.4300 21.0000 0.8300 21.6900 1.0000 0
42 21.00 0.0 0.00 0.0 5.00 0.00 50.00 9.90 28.25 7.40 … 0.4300 17.6700 0.4700 34.5000 0.8225 18.0000 1.1200 23.5000 1.3000 1
43 18.33 0.0 5.50 0.0 6.50 0.00 45.75 15.37 24.00 9.18 … 2.0375 14.0000 2.1200 38.0000 5.7675 17.3725 4.1575 18.2500 4.2600 0
44 39.00 0.0 0.00 0.0 0.00 0.00 41.00 1.00 15.00 2.49 … 0.0000 1.6275 0.0000 39.7500 0.7100 12.0000 0.5000 9.3300 0.8300 0
45 39.00 0.0 0.00 0.0 0.00 0.00 56.25 8.49 18.25 5.72 … 0.4300 11.7500 0.4300 54.0000 0.4700 12.7500 1.0000 18.0000 1.0000 0
46 18.00 0.0 5.67 0.0 6.25 0.00 46.00 16.20 24.50 8.76 … 1.9125 14.2500 1.8850 38.7500 5.8700 17.5000 4.0900 18.5000 4.4400 0
47 21.50 0.0 6.50 0.0 7.00 0.00 51.25 13.55 24.00 9.50 … 2.0375 13.7500 2.1800 37.7500 5.8900 17.2500 4.4025 18.0000 4.5575 0
48 19.75 0.0 6.25 0.0 6.25 0.00 45.50 13.47 22.25 9.00 … 2.1700 13.5000 2.1575 38.0000 5.9700 17.0000 4.3900 17.7500 4.5650 0
49 19.50 0.0 7.33 0.0 6.33 0.00 45.33 14.67 23.25 9.00 … 2.0500 13.7300 2.0500 37.0000 6.1050 17.2700 4.2600 18.2500 4.3225 0
50 23.50 0.0 0.00 0.0 0.00 0.00 30.00 1.79 13.25 5.02 … 0.4300 5.5000 0.4300 29.0000 0.5000 8.3300 1.2625 10.7500 1.0000 0
51 39.00 0.0 0.00 0.0 0.00 0.00 41.00 1.00 15.00 2.49 … 0.0000 1.6275 0.0000 39.7500 0.7100 12.0000 0.5000 9.3300 0.8300 0
52 26.75 0.0 7.00 0.0 8.00 0.00 44.75 11.68 27.00 9.01 … 1.5600 15.0000 1.6400 39.7500 4.0225 19.0000 4.0675 18.6700 3.6325 0
53 24.25 0.0 5.50 0.0 7.00 0.00 45.00 8.58 26.75 8.05 … 1.6400 17.9500 1.5000 40.2500 3.1300 19.0000 3.9175 21.7500 3.9000 0
54 37.00 0.0 1.00 0.0 1.00 0.00 48.25 2.12 21.75 5.61 … 0.4300 12.6275 0.4700 44.5000 0.5000 17.5000 1.0200 17.5000 1.2200 0
55 30.00 0.0 3.00 0.0 1.00 0.00 46.67 2.95 21.25 7.50 … 0.0000 10.6275 0.4700 45.0000 0.5000 19.7500 0.5000 14.2500 1.1200 0
56 48.00 0.0 0.00 0.0 0.00 0.00 48.25 0.43 13.00 2.86 … 0.0000 4.6700 0.4600 48.0000 0.0000 6.2500 0.5000 10.0000 0.8300 0
57 36.00 0.0 3.00 0.0 1.00 0.00 47.33 4.50 21.00 5.54 … 0.0000 11.0000 0.5000 45.7500 0.5000 15.5000 0.8300 14.6700 1.5000 0
58 27.00 0.0 6.00 0.0 7.67 0.00 44.33 10.43 27.67 9.63 … 1.7975 15.0000 1.8850 39.8125 4.1500 18.0500 4.0300 19.5000 4.2625 0
59 25.33 0.0 3.00 0.0 6.75 0.00 45.00 10.84 27.25 10.57 … 1.7900 14.7500 1.7000 40.2500 3.7175 18.8125 4.1100 18.5000 4.0375 0
60 36.00 0.0 6.67 0.0 1.00 0.00 47.50 1.92 21.00 6.02 … 0.0000 11.3100 0.4700 45.0000 0.5000 18.7500 0.7100 15.5425 1.2200 0
61 27.75 0.0 0.00 0.0 0.00 0.00 44.67 10.76 24.75 9.00 … 1.5800 15.0000 1.5800 40.5000 3.3675 17.2500 3.9000 18.7500 3.7400 0
62 15.50 0.0 7.67 0.0 5.75 0.00 43.67 17.24 23.00 9.20 … 2.1200 14.3300 2.2400 37.2500 5.7650 17.5000 4.4400 18.2500 4.5375 0
63 35.25 0.0 0.00 0.0 0.00 0.00 48.50 3.28 23.50 5.12 … 0.0000 11.6700 0.4700 42.5000 1.0000 18.2500 1.2200 19.7500 1.3000 0
64 21.50 0.0 6.50 0.0 6.33 0.00 51.00 12.21 23.33 9.09 … 2.0575 14.2375 2.1200 38.0625 5.6250 17.6700 3.8400 18.2500 4.5000 0
65 25.50 0.0 1.00 0.0 5.50 0.00 45.75 12.19 24.00 5.12 … 0.0000 15.5000 0.4300 42.5000 0.8200 20.2700 0.9400 19.0000 1.1450 0
66 19.00 0.0 7.67 0.0 0.00 0.00 43.75 11.20 26.50 8.87 … 1.7225 14.7500 1.8900 39.5000 4.8450 18.6900 4.3225 19.6900 4.0600 0
67 39.67 0.0 0.00 0.0 0.00 0.00 44.75 1.00 22.75 5.68 … 0.4300 8.5000 0.4700 44.3300 0.5000 14.0000 1.2500 18.2500 1.0000 0
68 39.00 0.0 0.00 0.0 0.00 0.00 56.25 8.49 18.25 5.72 … 0.4300 11.7500 0.4300 54.0000 0.4700 12.7500 1.0000 18.0000 1.0000 0
69 rows × 43 columns
In [75]:
test = []
for csv in test_csv:
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
if activity == ‘1’ or activity == ‘2’:
activity = ‘1’
if activity != ‘1’ and activity != ‘2’:
activity = ‘0’
test_path = “./test/” + csv
df = pd.read_csv(test_path, skiprows=4)
data = df.drop(columns=[‘# Columns: time’])
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.extend(activity)
test.append(instance)
In [76]:
col = [‘min_1′,’min_2′,’min_3′,’min_4′,’min_5′,’min_6’,
‘max_1′,’max_2′,’max_3′,’max_4′,’max_5′,’max_6’,
‘mean_1′,’mean_2′,’mean_3′,’mean_4′,’mean_5′,’mean_6’,
‘median_1′,’median_2′,’median_3′,’median_4′,’median_5′,’median_6’,
‘std_1′,’std_2′,’std_3′,’std_4′,’std_5′,’std_6’,
‘1st_quart_1′,’1st_quart_2′,’1st_quart_3′,’1st_quart_4′,’1st_quart_5′,’1st_quart_6’,
‘3rd_quart_1′,’3rd_quart_2′,’3rd_quart_3′,’3rd_quart_4′,’3rd_quart_5′,’3rd_quart_6’,
‘activity’
]
test_pd = pd.DataFrame(test,columns=col)
In [77]:
test_pd
Out[77]:
min_1 min_2 min_3 min_4 min_5 min_6 max_1 max_2 max_3 max_4 … 1st_quart_4 1st_quart_5 1st_quart_6 3rd_quart_1 3rd_quart_2 3rd_quart_3 3rd_quart_4 3rd_quart_5 3rd_quart_6 activity
0 24.75 0.0 1.00 0.0 0.00 0.0 48.33 3.11 16.50 5.91 … 0.0000 2.0000 0.3225 48.0000 0.0000 7.50 0.7100 5.5425 0.940 0
1 33.25 0.0 3.00 0.0 1.00 0.0 48.00 4.44 22.50 5.36 … 0.0000 10.1875 0.4700 43.5000 0.5000 19.33 0.8700 17.6900 1.250 0
2 12.75 0.0 0.00 0.0 10.67 0.0 51.00 6.87 25.33 6.76 … 0.0000 20.5000 0.4300 26.5000 0.7100 22.00 0.8700 27.0000 0.870 1
3 24.25 0.0 5.50 0.0 7.00 0.0 45.00 8.58 26.75 8.05 … 1.6400 17.9500 1.5000 40.2500 3.1300 19.00 3.9175 21.7500 3.900 0
4 33.33 0.0 0.00 0.0 0.00 0.0 48.00 3.90 18.75 5.79 … 0.4300 9.3300 0.4700 46.5000 0.5000 14.50 1.2200 17.7500 1.250 0
5 37.25 0.0 4.00 0.0 27.25 0.0 45.00 1.30 29.50 7.23 … 0.0000 33.0000 0.0000 42.0000 0.5000 23.25 1.1200 36.0000 1.300 1
6 0.00 0.0 7.50 0.0 0.00 0.0 42.75 7.76 35.00 5.76 … 0.0000 15.0000 0.4700 30.0000 0.5000 22.50 0.9400 20.7500 1.300 1
7 15.00 0.0 5.00 0.0 7.00 0.0 46.75 13.44 25.25 8.58 … 1.8000 14.2500 2.1200 38.2500 5.6725 17.33 4.0900 18.5000 4.240 0
8 48.00 0.0 0.00 0.0 0.00 0.0 48.25 0.43 13.00 2.86 … 0.0000 4.6700 0.4600 48.0000 0.0000 6.25 0.5000 10.0000 0.830 0
9 37.00 0.0 0.00 0.0 2.00 0.0 50.75 4.87 24.00 8.50 … 0.0000 16.5000 0.4300 46.5000 0.5000 21.00 0.8700 21.0000 1.120 0
10 12.50 0.0 5.75 0.0 7.25 0.0 45.00 13.05 23.75 9.10 … 2.1675 14.6275 2.0600 36.7500 5.8900 17.50 4.4400 18.7500 4.440 0
11 19.33 0.0 5.50 0.0 6.75 0.0 43.50 14.50 23.50 8.86 … 2.0500 14.7500 2.1050 37.7500 5.3425 17.67 4.4400 18.6700 4.425 0
12 32.75 0.0 1.50 0.0 1.33 0.0 47.00 3.34 21.00 5.85 … 0.0000 13.0000 0.4300 45.3725 0.5000 17.00 0.7100 18.5650 1.090 0
13 35.50 0.0 1.00 0.0 2.00 0.0 46.25 2.12 20.67 6.56 … 0.0000 12.7500 0.4700 44.5000 0.5000 15.00 0.8700 16.5000 1.220 0
14 28.75 0.0 6.50 0.0 10.50 0.0 44.75 9.91 24.67 8.32 … 1.7900 18.0000 1.4100 40.2500 2.8700 19.50 4.0300 21.5000 3.770 0
15 38.00 0.0 2.00 0.0 27.67 0.0 45.67 1.22 29.50 5.76 … 0.0000 32.0000 0.0000 43.6700 0.5000 22.25 1.1450 34.5000 1.300 1
16 22.00 0.0 6.33 0.0 7.50 0.0 44.67 14.17 24.00 9.74 … 1.7900 16.0000 1.5000 40.0625 3.3500 19.00 4.0000 21.0000 4.150 0
17 44.50 0.0 2.00 0.0 4.75 0.0 46.75 1.00 23.75 6.36 … 0.4300 17.6275 0.4300 45.3300 0.4300 15.25 1.4800 21.0000 1.120 0
18 23.50 0.0 0.00 0.0 0.00 0.0 30.00 1.79 13.25 5.02 … 0.4300 5.5000 0.4300 29.0000 0.5000 8.33 1.2625 10.7500 1.000 0
19 rows × 43 columns
In [135]:
new_data = pd.concat([train_pd,test_pd])
In [136]:
new_data = new_data.reset_index(drop=True)
In [143]:
new_data
Out[143]:
min_1 min_2 min_3 min_4 min_5 min_6 max_1 max_2 max_3 max_4 … 1st_quart_4 1st_quart_5 1st_quart_6 3rd_quart_1 3rd_quart_2 3rd_quart_3 3rd_quart_4 3rd_quart_5 3rd_quart_6 activity
0 36.33 0.0 1.50 0.0 1.00 0.00 47.67 1.66 21.25 4.11 … 0.0000 11.2500 0.4700 46.3300 0.5000 18.3300 0.5000 14.5000 1.0000 0
1 26.25 0.0 7.00 0.0 2.00 0.00 44.25 8.64 26.50 8.06 … 1.5800 14.0000 1.6600 40.2500 3.3500 17.3300 3.7700 18.2500 4.0300 0
2 19.50 0.0 7.00 0.0 7.00 0.00 46.00 12.47 22.67 8.34 … 1.7900 14.0000 2.1600 37.8125 5.8000 17.0000 4.1100 17.7500 4.3350 0
3 23.50 0.0 0.00 0.0 0.00 0.00 30.00 1.79 13.25 5.02 … 0.4300 5.5000 0.4300 29.0000 0.5000 8.3300 1.2625 10.7500 1.0000 0
4 29.75 0.0 6.50 0.0 2.67 0.00 48.00 4.60 21.00 6.52 … 0.0000 11.6700 0.4700 47.7500 0.5000 18.2500 0.7100 15.5000 1.2200 0
5 19.00 0.0 0.00 0.0 1.67 0.00 45.50 6.40 32.75 11.42 … 0.0000 15.0000 0.4700 38.0000 0.5000 20.6900 1.1200 20.8125 1.3000 1
6 39.00 0.0 0.00 0.0 0.00 0.00 40.00 1.00 12.20 3.27 … 0.0000 9.0000 0.4300 39.5000 0.5000 8.7500 0.5000 12.3300 0.8300 0
7 18.50 0.0 5.00 0.0 4.00 0.00 44.25 12.60 27.00 9.46 … 1.5000 14.0000 1.7975 39.3300 4.3300 18.3300 3.7800 18.0625 4.0600 0
8 37.00 0.0 5.75 0.0 8.00 0.00 48.00 1.58 27.00 10.03 … 0.4300 22.2500 0.4300 45.0000 0.5000 18.2700 1.1200 24.0000 0.8700 1
9 36.00 0.0 6.33 0.0 2.00 0.00 45.80 2.12 24.00 5.59 … 0.4300 7.6275 0.5000 44.6175 0.5050 18.0000 1.0375 12.0000 1.5850 0
10 33.00 0.0 3.00 0.0 23.67 0.00 45.75 2.83 28.25 6.42 … 0.4300 28.4575 0.0000 42.7500 0.7100 22.0625 1.1200 31.2500 0.5000 1
11 23.50 0.0 6.67 0.0 5.50 0.00 46.25 14.82 24.25 9.90 … 2.0575 13.7500 2.1700 38.2500 5.9325 17.2500 4.1900 18.0000 4.5000 0
12 0.00 0.0 0.00 0.0 0.00 0.00 40.67 1.00 13.25 4.03 … 0.4300 11.3300 0.4300 39.6700 0.5000 8.7500 1.1200 13.0000 0.8700 0
13 29.25 0.0 0.00 0.0 2.00 0.00 46.00 4.72 24.00 6.12 … 0.0000 13.7300 0.4300 44.5000 0.5000 20.0000 1.0000 19.0000 1.2200 0
14 36.25 0.0 0.00 0.0 0.00 0.00 45.50 2.60 22.00 4.72 … 0.0000 14.0000 0.4300 44.3300 0.5000 13.3300 0.8300 16.6900 0.9500 0
15 27.00 0.0 5.67 0.0 8.67 0.00 45.00 10.47 25.00 10.61 … 1.7000 15.5000 1.5600 40.2500 3.8700 17.6700 3.8700 19.2700 3.7700 0
16 27.00 0.0 2.50 0.0 2.00 0.00 38.67 2.49 23.75 5.12 … 0.0000 16.5000 0.4300 37.0000 1.0000 21.0000 0.8700 20.7500 1.0000 0
17 41.75 0.0 0.00 0.0 5.67 0.00 46.50 1.50 21.67 4.64 … 0.0000 14.0000 0.4300 44.5000 0.5000 18.4250 0.7200 18.3300 1.0900 0
18 36.25 0.0 1.50 0.0 11.33 0.00 48.00 1.50 26.33 5.17 … 0.0000 20.5000 0.0000 44.6700 0.5000 18.0000 0.9400 23.7500 0.8300 1
19 20.75 0.0 5.00 0.0 6.00 0.00 46.25 12.68 23.75 9.20 … 2.0500 14.2500 2.0500 38.2500 5.7225 17.2500 4.1950 18.3300 4.3050 0
20 42.00 0.0 0.00 0.0 1.00 0.00 45.00 1.12 23.00 5.45 … 0.0000 13.2500 0.4300 43.5000 0.5000 20.0000 0.7100 19.0000 1.0900 0
21 40.00 0.0 2.00 0.0 2.50 0.00 44.67 1.00 23.50 5.93 … 0.0000 9.7500 0.4300 43.2500 0.5000 15.0000 0.8300 22.0000 0.8300 0
22 34.50 0.0 5.00 0.0 1.00 0.00 47.75 2.18 21.33 5.54 … 0.0000 12.0000 0.4700 45.2500 0.5000 19.3300 0.7100 14.8125 1.2200 0
23 36.00 0.0 0.00 0.0 0.00 0.00 47.33 2.17 21.00 5.56 … 0.0000 12.7500 0.4300 44.7500 0.5000 14.3300 0.8200 16.5000 0.9400 0
24 33.00 0.0 8.50 0.0 20.00 0.00 47.75 3.00 30.00 5.15 … 0.4300 30.4575 0.0000 45.0000 1.1200 24.3725 1.3000 36.3300 1.0000 1
25 19.25 0.0 6.00 0.0 4.67 0.43 44.00 13.86 22.75 9.10 … 2.0500 13.7300 2.1200 38.0000 5.9000 17.3300 4.0375 17.7500 4.3750 0
26 28.50 0.0 0.00 0.0 1.50 0.00 48.25 3.28 24.25 6.98 … 0.4300 10.5000 0.5000 46.5000 0.7100 18.7500 1.4800 19.2500 1.7000 0
27 23.33 0.0 7.00 0.0 7.67 0.00 43.50 9.71 28.50 9.78 … 1.7750 15.7500 2.1700 39.2500 3.7400 21.0000 4.2400 21.0000 4.6175 0
28 27.50 0.0 1.50 0.0 0.00 0.00 50.75 6.06 23.25 6.02 … 0.0000 14.0000 0.4300 42.0000 0.5000 18.0000 0.8700 20.7500 1.0900 0
29 35.50 0.0 0.00 0.0 2.50 0.00 48.00 4.50 21.00 5.12 … 0.0000 12.0000 0.4700 48.0000 0.5000 18.5000 0.8300 15.2500 1.1200 0
… … … … … … … … … … … … … … … … … … … … … …
58 27.00 0.0 6.00 0.0 7.67 0.00 44.33 10.43 27.67 9.63 … 1.7975 15.0000 1.8850 39.8125 4.1500 18.0500 4.0300 19.5000 4.2625 0
59 25.33 0.0 3.00 0.0 6.75 0.00 45.00 10.84 27.25 10.57 … 1.7900 14.7500 1.7000 40.2500 3.7175 18.8125 4.1100 18.5000 4.0375 0
60 36.00 0.0 6.67 0.0 1.00 0.00 47.50 1.92 21.00 6.02 … 0.0000 11.3100 0.4700 45.0000 0.5000 18.7500 0.7100 15.5425 1.2200 0
61 27.75 0.0 0.00 0.0 0.00 0.00 44.67 10.76 24.75 9.00 … 1.5800 15.0000 1.5800 40.5000 3.3675 17.2500 3.9000 18.7500 3.7400 0
62 15.50 0.0 7.67 0.0 5.75 0.00 43.67 17.24 23.00 9.20 … 2.1200 14.3300 2.2400 37.2500 5.7650 17.5000 4.4400 18.2500 4.5375 0
63 35.25 0.0 0.00 0.0 0.00 0.00 48.50 3.28 23.50 5.12 … 0.0000 11.6700 0.4700 42.5000 1.0000 18.2500 1.2200 19.7500 1.3000 0
64 21.50 0.0 6.50 0.0 6.33 0.00 51.00 12.21 23.33 9.09 … 2.0575 14.2375 2.1200 38.0625 5.6250 17.6700 3.8400 18.2500 4.5000 0
65 25.50 0.0 1.00 0.0 5.50 0.00 45.75 12.19 24.00 5.12 … 0.0000 15.5000 0.4300 42.5000 0.8200 20.2700 0.9400 19.0000 1.1450 0
66 19.00 0.0 7.67 0.0 0.00 0.00 43.75 11.20 26.50 8.87 … 1.7225 14.7500 1.8900 39.5000 4.8450 18.6900 4.3225 19.6900 4.0600 0
67 39.67 0.0 0.00 0.0 0.00 0.00 44.75 1.00 22.75 5.68 … 0.4300 8.5000 0.4700 44.3300 0.5000 14.0000 1.2500 18.2500 1.0000 0
68 39.00 0.0 0.00 0.0 0.00 0.00 56.25 8.49 18.25 5.72 … 0.4300 11.7500 0.4300 54.0000 0.4700 12.7500 1.0000 18.0000 1.0000 0
69 24.75 0.0 1.00 0.0 0.00 0.00 48.33 3.11 16.50 5.91 … 0.0000 2.0000 0.3225 48.0000 0.0000 7.5000 0.7100 5.5425 0.9400 0
70 33.25 0.0 3.00 0.0 1.00 0.00 48.00 4.44 22.50 5.36 … 0.0000 10.1875 0.4700 43.5000 0.5000 19.3300 0.8700 17.6900 1.2500 0
71 12.75 0.0 0.00 0.0 10.67 0.00 51.00 6.87 25.33 6.76 … 0.0000 20.5000 0.4300 26.5000 0.7100 22.0000 0.8700 27.0000 0.8700 1
72 24.25 0.0 5.50 0.0 7.00 0.00 45.00 8.58 26.75 8.05 … 1.6400 17.9500 1.5000 40.2500 3.1300 19.0000 3.9175 21.7500 3.9000 0
73 33.33 0.0 0.00 0.0 0.00 0.00 48.00 3.90 18.75 5.79 … 0.4300 9.3300 0.4700 46.5000 0.5000 14.5000 1.2200 17.7500 1.2500 0
74 37.25 0.0 4.00 0.0 27.25 0.00 45.00 1.30 29.50 7.23 … 0.0000 33.0000 0.0000 42.0000 0.5000 23.2500 1.1200 36.0000 1.3000 1
75 0.00 0.0 7.50 0.0 0.00 0.00 42.75 7.76 35.00 5.76 … 0.0000 15.0000 0.4700 30.0000 0.5000 22.5000 0.9400 20.7500 1.3000 1
76 15.00 0.0 5.00 0.0 7.00 0.00 46.75 13.44 25.25 8.58 … 1.8000 14.2500 2.1200 38.2500 5.6725 17.3300 4.0900 18.5000 4.2400 0
77 48.00 0.0 0.00 0.0 0.00 0.00 48.25 0.43 13.00 2.86 … 0.0000 4.6700 0.4600 48.0000 0.0000 6.2500 0.5000 10.0000 0.8300 0
78 37.00 0.0 0.00 0.0 2.00 0.00 50.75 4.87 24.00 8.50 … 0.0000 16.5000 0.4300 46.5000 0.5000 21.0000 0.8700 21.0000 1.1200 0
79 12.50 0.0 5.75 0.0 7.25 0.00 45.00 13.05 23.75 9.10 … 2.1675 14.6275 2.0600 36.7500 5.8900 17.5000 4.4400 18.7500 4.4400 0
80 19.33 0.0 5.50 0.0 6.75 0.00 43.50 14.50 23.50 8.86 … 2.0500 14.7500 2.1050 37.7500 5.3425 17.6700 4.4400 18.6700 4.4250 0
81 32.75 0.0 1.50 0.0 1.33 0.00 47.00 3.34 21.00 5.85 … 0.0000 13.0000 0.4300 45.3725 0.5000 17.0000 0.7100 18.5650 1.0900 0
82 35.50 0.0 1.00 0.0 2.00 0.00 46.25 2.12 20.67 6.56 … 0.0000 12.7500 0.4700 44.5000 0.5000 15.0000 0.8700 16.5000 1.2200 0
83 28.75 0.0 6.50 0.0 10.50 0.00 44.75 9.91 24.67 8.32 … 1.7900 18.0000 1.4100 40.2500 2.8700 19.5000 4.0300 21.5000 3.7700 0
84 38.00 0.0 2.00 0.0 27.67 0.00 45.67 1.22 29.50 5.76 … 0.0000 32.0000 0.0000 43.6700 0.5000 22.2500 1.1450 34.5000 1.3000 1
85 22.00 0.0 6.33 0.0 7.50 0.00 44.67 14.17 24.00 9.74 … 1.7900 16.0000 1.5000 40.0625 3.3500 19.0000 4.0000 21.0000 4.1500 0
86 44.50 0.0 2.00 0.0 4.75 0.00 46.75 1.00 23.75 6.36 … 0.4300 17.6275 0.4300 45.3300 0.4300 15.2500 1.4800 21.0000 1.1200 0
87 23.50 0.0 0.00 0.0 0.00 0.00 30.00 1.79 13.25 5.02 … 0.4300 5.5000 0.4300 29.0000 0.5000 8.3300 1.2625 10.7500 1.0000 0
88 rows × 43 columns
In [80]:
new_data.std()
Out[80]:
min_1 9.624011
min_2 0.000000
min_3 2.954516
min_4 0.000000
min_5 6.121205
min_6 0.046101
max_1 4.207745
max_2 5.059656
max_3 4.819848
max_4 2.181809
max_5 5.773355
max_6 2.533515
mean_1 5.276421
mean_2 1.577906
mean_3 3.976802
mean_4 1.168264
mean_5 5.704025
mean_6 1.157220
median_1 5.386624
median_2 1.413545
median_3 4.009687
median_4 1.148591
median_5 5.844368
median_6 1.089858
std_1 1.771289
std_2 0.885878
std_3 0.952003
std_4 0.458618
std_5 1.012345
std_6 0.516973
1st_quart_1 6.127846
1st_quart_2 0.948434
1st_quart_3 4.184148
1st_quart_4 0.844919
1st_quart_5 6.122142
1st_quart_6 0.761542
3rd_quart_1 5.031028
3rd_quart_2 2.131337
3rd_quart_3 4.153451
3rd_quart_4 1.555235
3rd_quart_5 5.563553
3rd_quart_6 1.526871
dtype: float64
I select min, median, and max as the three most important time-domain features
(d) Binary Classification Using Logistic Regression¶
i. Depict scatter plots of the features you specified in 1(c)iv extracted from time series 1, 2, and 6 of each instance, and use color to distinguish bending vs. other activities.¶
In [138]:
data_plot = new_data.copy()
In [145]:
data_plot = data_plot.reset_index(drop=False)
In [159]:
sns.lmplot(x=’index’,y=’min_1′,data=data_plot, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’min_2′,data=data_plot, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’min_6′,data=data_plot, hue=’activity’,fit_reg=False)
Out[159]:
In [160]:
sns.lmplot(x=’index’,y=’median_1′,data=data_plot, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’median_2′,data=data_plot, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’median_6′,data=data_plot, hue=’activity’,fit_reg=False)
Out[160]:
In [158]:
sns.lmplot(x=’index’,y=’max_1′,data=data_plot, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’max_2′,data=data_plot, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’max_6′,data=data_plot, hue=’activity’,fit_reg=False)
Out[158]:
ii. Break each time series in your training set into two (approximately) equal length time series.¶
In [199]:
train_2 = []
columns_name = []
for csv in train_csv:
num=1
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
if activity == ‘1’ or activity == ‘2’:
activity = ‘1’
if activity != ‘1’ and activity != ‘2’:
activity = ‘0’
train_path = “./training/” + csv
# print(train_path)
df = pd.read_csv(train_path, skiprows=4)
factor = pd.cut(df[‘# Columns: time’], 2)
for i in df.groupby(factor):
data_ = pd.DataFrame(list(i)[1]).drop(columns=[‘# Columns: time’])
data_.reset_index(drop=True,inplace=True)
if num == 1:
data = data_
else:
data = data.join(data_, lsuffix=’l’, rsuffix=’r’)
num = num + 1
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
train_2.append(instance)
for n in range(2*6):
name = ‘min_’ + str(n+1)
columns_name.append(name)
for n in range(2*6):
name = ‘max_’ + str(n+1)
columns_name.append(name)
for n in range(2*6):
name = ‘mean_’ + str(n+1)
columns_name.append(name)
for n in range(2*6):
name = ‘median_’ + str(n+1)
columns_name.append(name)
for n in range(2*6):
name = ‘std_’ + str(n+1)
columns_name.append(name)
for n in range(2*6):
name = ‘1st_quart_’ + str(n+1)
columns_name.append(name)
for n in range(2*6):
name = ‘3st_quart_’ + str(n+1)
columns_name.append(name)
columns_name.append(‘activity’)
In [200]:
train_2_pd = pd.DataFrame(train_2, columns=columns_name)
In [201]:
train_2_pd
Out[201]:
min_1 min_2 min_3 min_4 min_5 min_6 min_7 min_8 min_9 min_10 … 3st_quart_4 3st_quart_5 3st_quart_6 3st_quart_7 3st_quart_8 3st_quart_9 3st_quart_10 3st_quart_11 3st_quart_12 activity
0 45.00 0.0 15.00 0.00 6.67 0.00 36.33 0.00 1.50 0.00 … 0.5000 14.2500 0.9550 45.7500 0.5000 18.6700 0.5000 14.8125 1.0975 0
1 26.25 0.0 7.00 0.00 5.33 0.00 29.25 0.00 8.00 0.00 … 3.7700 18.2500 4.0375 40.2500 3.3775 17.7525 3.7700 18.0000 3.7700 0
2 19.50 0.0 7.00 0.00 7.00 0.00 20.25 0.00 7.67 0.00 … 4.2400 18.5000 4.1500 38.3300 5.5875 17.0000 3.9775 17.2500 4.5125 0
3 23.50 0.0 0.00 0.00 0.00 0.00 24.00 0.00 0.00 0.00 … 1.1325 11.5425 0.9400 29.5000 0.5000 7.0000 1.3000 10.0000 1.0900 0
4 41.25 0.0 6.50 0.00 2.67 0.00 29.75 0.00 7.50 0.00 … 0.7100 13.6700 1.3000 47.7500 0.5000 18.0000 0.7100 16.5000 1.0000 0
5 19.00 0.0 0.00 0.00 5.50 0.00 20.00 0.00 5.75 0.00 … 1.0000 20.0000 1.2200 42.0000 0.5000 22.0000 1.2200 25.0000 1.3000 1
6 39.00 0.0 0.00 0.00 7.75 0.00 39.00 0.00 0.00 0.00 … 0.5000 12.2500 0.7100 39.5000 0.5000 8.3300 0.5000 12.7500 0.8875 0
7 20.00 0.0 5.00 0.00 4.00 0.00 18.50 0.00 6.67 0.00 … 3.9000 18.0000 4.3500 39.3725 4.2750 18.5000 3.7700 18.2500 3.7400 0
8 39.00 0.0 6.25 0.00 10.00 0.00 37.00 0.00 5.75 0.00 … 1.4100 24.2500 1.1200 43.0000 0.5000 15.0000 0.8225 23.7500 0.7100 1
9 36.67 0.0 8.25 0.00 2.00 0.00 36.00 0.00 6.33 0.00 … 0.8700 11.0000 1.5800 44.7500 0.5000 18.0000 1.2200 13.7500 1.6400 0
10 33.00 0.0 3.00 0.00 23.67 0.00 39.25 0.00 12.50 0.00 … 1.6300 31.5000 0.7100 42.0000 0.5000 20.7500 0.8300 30.5625 0.5000 1
11 24.33 0.0 6.67 0.00 6.00 0.00 23.50 0.43 8.75 0.43 … 4.2175 18.0000 4.5850 38.2700 6.2400 17.2500 4.1500 18.0625 4.4525 0
12 39.33 0.0 0.00 0.00 7.50 0.00 0.00 0.00 0.00 0.00 … 1.0000 12.8125 0.8300 39.6700 0.5000 8.7500 1.3000 13.0000 0.9400 0
13 29.25 0.0 0.00 0.00 6.75 0.00 37.75 0.00 1.00 0.00 … 0.7100 21.0000 1.1200 44.5000 0.5000 16.7500 1.2200 18.4250 1.2200 0
14 38.33 0.0 4.00 0.00 9.33 0.00 36.25 0.00 0.00 0.00 … 0.7375 16.2500 1.0000 44.5000 0.5000 13.0000 0.8300 17.0000 0.8875 0
15 29.67 0.0 6.00 0.00 8.67 0.00 27.00 0.00 5.67 0.00 … 3.7700 19.0000 3.5625 40.5750 3.6700 18.5000 4.0225 20.0000 4.0825 0
16 27.00 0.0 4.00 0.00 3.00 0.00 27.00 0.00 2.50 0.00 … 0.5000 20.5000 1.0375 37.5000 1.2625 15.0000 1.1325 21.0000 1.0000 0
17 42.00 0.0 3.33 0.00 5.67 0.00 41.75 0.00 0.00 0.00 … 0.8200 18.2825 1.1200 42.0000 0.0000 20.2500 0.7100 18.3300 1.0900 0
18 36.67 0.0 4.00 0.00 11.33 0.00 36.25 0.00 1.50 0.00 … 0.9550 24.0000 0.8300 44.7500 0.5000 18.0000 0.8875 23.5000 0.8300 1
19 22.33 0.0 5.00 0.00 8.25 0.00 20.75 0.43 8.00 0.00 … 4.3450 18.2500 4.2175 37.7500 5.6725 17.2500 3.9600 18.5000 4.4400 0
20 42.00 0.0 5.67 0.00 4.00 0.00 42.00 0.00 0.00 0.00 … 0.8250 19.5000 1.1200 42.3300 0.4700 20.6700 0.5000 18.2500 1.0000 0
21 40.00 0.0 2.25 0.00 2.50 0.00 40.75 0.00 2.00 0.00 … 0.8200 12.5250 1.0000 43.5000 0.7200 16.7500 0.9400 23.5000 0.8300 0
22 34.50 0.0 7.00 0.00 1.00 0.00 37.50 0.00 5.00 0.00 … 0.8300 15.5000 1.2200 45.3300 0.5000 19.7500 0.5000 13.8125 1.1200 0
23 37.25 0.0 0.00 0.00 0.00 0.00 36.00 0.00 5.75 0.00 … 0.8700 15.3300 1.0000 44.7500 0.5000 14.5000 0.5000 17.7500 0.8700 0
24 33.75 0.0 11.00 0.00 30.75 0.00 33.00 0.00 8.50 0.00 … 1.2500 37.0000 1.2200 44.5000 1.2275 24.0000 1.4800 33.7500 0.8700 1
25 19.25 0.0 6.00 0.00 4.67 0.43 20.75 0.00 8.67 0.00 … 4.0675 18.0000 4.5075 38.0000 6.0250 17.2500 3.9600 17.5000 4.3050 0
26 36.25 0.0 0.00 0.00 4.00 0.00 28.50 0.00 3.33 0.00 … 1.6600 20.2500 1.6250 45.0000 0.8300 22.0000 1.2625 17.0000 1.7900 0
27 27.00 0.0 9.50 0.00 7.67 0.00 23.33 0.00 7.00 0.00 … 4.0900 20.8125 4.7025 39.2700 3.6850 21.0000 4.3900 21.0625 4.5000 0
28 29.00 0.0 1.50 0.00 8.50 0.00 27.50 0.00 4.20 0.00 … 1.1200 21.0000 0.9400 43.5000 0.4700 19.0625 0.7100 19.0625 1.3000 0
29 35.50 0.0 3.00 0.00 3.00 0.00 37.75 0.00 0.00 0.00 … 0.7100 16.0000 1.1200 48.0000 0.4300 14.0000 0.8300 15.0000 1.2200 0
… … … … … … … … … … … … … … … … … … … … … …
39 36.50 0.0 14.25 0.00 30.00 0.00 35.00 0.00 6.50 0.00 … 0.8300 37.0000 1.0000 45.0000 0.5000 24.0000 0.7100 36.2500 0.7100 1
40 18.33 0.0 8.33 0.00 9.50 0.43 20.33 0.00 7.67 0.00 … 4.2775 17.7500 4.2250 38.2500 5.8150 17.8500 4.1825 18.2500 4.4375 0
41 42.50 0.0 4.00 0.00 4.33 0.00 43.00 0.00 3.67 0.00 … 0.5000 20.0000 0.9400 45.0000 0.4300 21.2500 0.9400 22.6900 1.0900 0
42 21.00 0.0 2.75 0.00 5.00 0.00 27.00 0.00 0.00 0.00 … 1.0900 27.0000 2.0500 33.7500 1.2275 17.7500 1.1200 20.0000 0.9400 1
43 18.33 0.0 8.00 0.00 6.50 0.00 22.67 0.00 5.50 0.00 … 4.2600 18.2500 4.1200 37.5000 6.0050 17.5000 4.0425 18.2500 4.3450 0
44 39.33 0.0 9.00 0.00 0.00 0.00 39.00 0.00 0.00 0.00 … 0.4700 6.6700 0.8200 39.5000 0.5000 12.0000 0.7100 11.0625 0.9400 0
45 39.00 0.0 0.00 0.00 0.00 0.00 41.33 0.00 5.50 0.00 … 1.3000 12.5000 1.1450 54.0000 0.0000 13.3300 0.8300 19.0000 0.8300 0
46 18.00 0.0 8.67 0.00 6.25 0.00 23.00 0.00 5.67 0.00 … 3.9125 18.6700 4.2450 38.5000 5.8750 17.5000 4.1950 18.0625 4.6100 0
47 22.50 0.0 8.00 0.00 7.00 0.00 21.50 0.00 6.50 0.00 … 4.4925 18.0000 4.5000 38.2700 5.5050 17.2500 4.3375 18.0000 4.6400 0
48 20.33 0.0 6.25 0.43 6.25 0.47 19.75 0.00 7.00 0.00 … 4.5000 17.7500 4.7900 38.2500 5.4175 17.0000 4.0600 17.7500 4.3950 0
49 19.50 0.0 7.33 0.00 8.00 0.00 22.25 0.00 7.50 0.00 … 4.1950 18.2500 4.2450 37.0000 6.4500 17.2700 4.3200 18.0625 4.4525 0
50 23.50 0.0 0.00 0.00 0.00 0.00 24.00 0.00 0.00 0.00 … 1.1325 11.5425 0.9400 29.5000 0.5000 7.0000 1.3000 10.0000 1.0900 0
51 39.33 0.0 9.00 0.00 0.00 0.00 39.00 0.00 0.00 0.00 … 0.4700 6.6700 0.8200 39.5000 0.5000 12.0000 0.7100 11.0625 0.9400 0
52 26.75 0.0 8.25 0.00 8.00 0.00 27.33 0.00 7.00 0.00 … 4.0300 18.6900 3.8475 39.8125 3.9000 19.3725 4.1275 18.6700 3.4150 0
53 24.25 0.0 5.50 0.00 7.00 0.00 29.00 0.00 6.75 0.00 … 4.0600 21.7500 3.9375 40.2500 3.0075 18.2500 3.7700 21.6700 3.8325 0
54 38.25 0.0 1.00 0.00 2.67 0.00 37.00 0.00 2.00 0.00 … 0.7100 17.6700 1.0900 42.5000 0.8200 14.0000 1.3000 17.0625 1.3000 0
55 30.00 0.0 4.50 0.00 1.00 0.00 33.75 0.00 3.00 0.00 … 0.5000 14.5000 1.0900 45.0000 0.5000 19.0000 0.5000 13.6700 1.2200 0
56 48.00 0.0 1.00 0.00 0.00 0.00 48.00 0.00 0.00 0.00 … 0.5000 7.5000 0.9400 48.0000 0.0000 6.0000 0.4775 10.7500 0.8300 0
57 36.00 0.0 3.00 0.00 1.00 0.00 37.75 0.00 5.00 0.00 … 1.0000 14.2500 1.5000 46.5000 0.5000 16.0000 0.7325 15.2500 1.4800 0
58 27.67 0.0 7.00 0.00 7.67 0.00 27.00 0.00 6.00 0.00 … 4.3825 20.0000 4.2625 39.3725 4.0375 17.3300 3.6300 19.0625 4.2225 0
59 27.80 0.0 3.00 0.00 8.67 0.00 25.33 0.00 8.25 0.00 … 4.1200 18.0000 3.7700 40.0625 3.6475 19.7500 4.1100 18.8125 4.4250 0
60 36.00 0.0 9.00 0.00 1.00 0.00 36.00 0.00 6.67 0.00 … 0.5000 14.8125 1.1200 45.0000 0.7100 18.0000 0.8400 15.8125 1.3000 0
61 27.75 0.0 3.00 0.00 9.75 0.00 31.00 0.00 0.00 0.00 … 3.9400 18.7500 3.9000 40.5000 3.6400 17.0000 3.8700 18.7500 3.7050 0
62 23.00 0.0 7.67 0.43 5.75 0.00 15.50 0.00 9.25 0.00 … 4.3300 18.3725 4.2450 37.2500 6.0600 17.3300 4.4675 18.0000 4.7250 0
63 36.00 0.0 4.00 0.00 0.00 0.00 35.25 0.00 0.00 0.00 … 0.8300 16.2700 1.3275 38.5000 1.0000 17.5000 1.5000 21.0000 1.2625 0
64 21.50 0.0 9.00 0.00 6.33 0.00 21.75 0.00 6.50 0.00 … 3.9175 18.2500 4.5125 38.3125 5.3950 17.5425 3.7100 18.2500 4.3500 0
65 25.67 0.0 1.00 0.00 5.50 0.00 25.50 0.00 4.75 0.00 … 1.1200 18.2500 1.2200 36.8125 0.8700 20.5000 0.8300 19.7625 1.1200 0
66 19.00 0.0 7.67 0.00 0.00 0.00 19.25 0.00 8.50 0.00 … 4.1950 18.5000 4.3450 39.5000 4.5075 19.0000 4.5000 20.5000 3.7000 0
67 39.67 0.0 0.00 0.00 2.00 0.00 41.00 0.00 0.00 0.00 … 1.3000 15.0000 1.1200 44.5000 0.5000 14.2500 1.2200 19.3125 1.0000 0
68 39.00 0.0 0.00 0.00 0.00 0.00 41.33 0.00 5.50 0.00 … 1.3000 12.5000 1.1450 54.0000 0.0000 13.3300 0.8300 19.0000 0.8300 0
69 rows × 85 columns
In [204]:
data_plot_2 = train_2_pd.copy()
data_plot_2 = data_plot_2.reset_index(drop=False)
In [205]:
sns.lmplot(x=’index’,y=’min_1′,data=data_plot_2, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’min_2′,data=data_plot_2, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’min_6′,data=data_plot_2, hue=’activity’,fit_reg=False)
Out[205]:
In [207]:
sns.lmplot(x=’index’,y=’median_1′,data=data_plot_2, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’median_2′,data=data_plot_2, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’median_6′,data=data_plot_2, hue=’activity’,fit_reg=False)
Out[207]:
In [206]:
sns.lmplot(x=’index’,y=’max_1′,data=data_plot_2, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’max_2′,data=data_plot_2, hue=’activity’,fit_reg=False)
sns.lmplot(x=’index’,y=’max_6′,data=data_plot_2, hue=’activity’,fit_reg=False)
Out[206]:
Yes, I see considerable difference in the results with those of 1(d)i
iii. Break each time series in your training set into l 2 f1; 2; : : : ; 20g time series of approximately equal length and use logistic regression4 to solve the binary classification problem¶
In [3]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold,cross_val_score
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=5)
acc = []
for l in range(20):
l = l+1
train_n = []
columns_name = []
print(‘Break number:’,l)
for csv in train_csv:
num=1
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
if activity == ‘1’ or activity == ‘2’:
activity = ‘1’
if activity != ‘1’ and activity != ‘2’:
activity = ‘0’
train_path = “./training/” + csv
# print(train_path)
df = pd.read_csv(train_path, skiprows=4)
factor = pd.cut(df[‘# Columns: time’], l)
for i in df.groupby(factor):
data_ = pd.DataFrame(list(i)[1]).drop(columns=[‘# Columns: time’])
data_.reset_index(drop=True,inplace=True)
if num == 1:
data = data_.copy()
else:
data = data.join(data_, lsuffix=’l’, rsuffix=’r’)
num = num + 1
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
train_n.append(instance)
for n in range(l*6):
name = ‘min_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘max_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘mean_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘median_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘std_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘1st_quart_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘3st_quart_’ + str(n+1)
columns_name.append(name)
columns_name.append(‘activity’)
train_n = pd.DataFrame(train_n, columns=columns_name)
tr = train_n.drop(columns=[‘activity’])
label = train_n[‘activity’]
tr_RFE = RFE(estimator=LogisticRegression(), n_features_to_select=1).fit_transform(tr, label)
LR = LogisticRegression(C=100000000000000000000)
score = cross_val_score(LR, tr_RFE, label, cv=skf)
acc.append(np.mean(score))
Break number: 1
Break number: 2
Break number: 3
Break number: 4
Break number: 5
Break number: 6
Break number: 7
Break number: 8
Break number: 9
Break number: 10
Break number: 11
Break number: 12
Break number: 13
Break number: 14
Break number: 15
Break number: 16
Break number: 17
Break number: 18
Break number: 19
Break number: 20
In [4]:
acc
Out[4]:
[0.9560439560439562,
0.8692307692307691,
0.9714285714285715,
0.9714285714285715,
0.8703296703296702,
0.8703296703296702,
0.8989010989010989,
0.9131868131868132,
0.9428571428571428,
0.9857142857142858,
0.9571428571428571,
0.9571428571428573,
0.9714285714285715,
0.9571428571428573,
0.9571428571428573,
0.9571428571428571,
0.9571428571428571,
0.9714285714285715,
0.9571428571428573,
0.9714285714285715]
I find the best l is 10
In [50]:
acc = []
l=10
train_10 = []
columns_name = []
for csv in train_csv:
num=1
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
if activity == ‘1’ or activity == ‘2’:
activity = ‘1’
if activity != ‘1’ and activity != ‘2’:
activity = ‘0’
train_path = “./training/” + csv
# print(train_path)
df = pd.read_csv(train_path, skiprows=4)
factor = pd.cut(df[‘# Columns: time’], l)
for i in df.groupby(factor):
data_ = pd.DataFrame(list(i)[1]).drop(columns=[‘# Columns: time’])
data_.reset_index(drop=True,inplace=True)
if num == 1:
data = data_.copy()
else:
data = data.join(data_, lsuffix=’l’, rsuffix=’r’)
num = num + 1
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
train_10.append(instance)
for n in range(l*6):
name = ‘min_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘max_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘mean_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘median_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘std_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘1st_quart_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘3st_quart_’ + str(n+1)
columns_name.append(name)
columns_name.append(‘activity’)
In [51]:
train_10_pd = pd.DataFrame(train_10, columns=columns_name)
train_10 = pd.DataFrame(train_10, columns=columns_name)
tr_10 = train_10.drop(columns=[‘activity’])
label = train_10[‘activity’]
tr_10_RFE = RFE(estimator=LogisticRegression(), n_features_to_select=1).fit_transform(tr_10, label)
In [52]:
from sklearn.metrics import confusion_matrix,roc_curve
LR = LogisticRegression(C=100000000000000000000)
LR.fit( train_10, label,)
pred_LR = LR.predict(train_10)
print(“confusion matrix”)
print(confusion_matrix(label, pred_LR))
confusion matrix
[[60 0]
[ 0 9]]
In [53]:
from sklearn.metrics import roc_curve, auc
fpr, tpr, thresholds = roc_curve(label.astype(np.int), pred_LR.astype(np.int), pos_label=1)
In [54]:
auc(fpr, tpr)
Out[54]:
1.0
In [55]:
plt.plot(fpr, tpr)
Out[55]:
[
In [57]:
acc = []
l=10
test_10 = []
columns_name = []
for csv in test_csv:
num=1
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
if activity == ‘1’ or activity == ‘2’:
activity = ‘1’
if activity != ‘1’ and activity != ‘2’:
activity = ‘0’
test_path = “./test/” + csv
df = pd.read_csv(test_path, skiprows=4)
factor = pd.cut(df[‘# Columns: time’], l)
for i in df.groupby(factor):
data_ = pd.DataFrame(list(i)[1]).drop(columns=[‘# Columns: time’])
data_.reset_index(drop=True,inplace=True)
if num == 1:
data = data_.copy()
else:
data = data.join(data_, lsuffix=’l’, rsuffix=’r’)
num = num + 1
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
test_10.append(instance)
for n in range(l*6):
name = ‘min_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘max_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘mean_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘median_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘std_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘1st_quart_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘3st_quart_’ + str(n+1)
columns_name.append(name)
columns_name.append(‘activity’)
In [58]:
test_10_pd = pd.DataFrame(test_10, columns=columns_name)
test_10 = pd.DataFrame(test_10, columns=columns_name)
te_10 = test_10.drop(columns=[‘activity’])
label = test_10[‘activity’]
te_10_RFE = RFE(estimator=LogisticRegression(), n_features_to_select=1).fit_transform(te_10, label)
In [59]:
pred_LR = LR.predict(test_10)
print(“confusion matrix”)
print(confusion_matrix(label, pred_LR))
confusion matrix
[[15 0]
[ 0 4]]
the accuracy on the test set is 1
vi. Do your classes seem to be well-separated to cause instability in calculating logistic regression parameters¶
No
vii. From the confusion matrices you obtained, do you see imbalanced classes¶
No
(e) Binary Classification Using L1-penalized logistic regression¶
In [62]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold,cross_val_score
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=5)
acc = []
for l in range(20):
l = l+1
train_n = []
columns_name = []
print(‘Break number:’,l)
for csv in train_csv:
num=1
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
if activity == ‘1’ or activity == ‘2’:
activity = ‘1’
if activity != ‘1’ and activity != ‘2’:
activity = ‘0’
train_path = “./training/” + csv
# print(train_path)
df = pd.read_csv(train_path, skiprows=4)
factor = pd.cut(df[‘# Columns: time’], l)
for i in df.groupby(factor):
data_ = pd.DataFrame(list(i)[1]).drop(columns=[‘# Columns: time’])
data_.reset_index(drop=True,inplace=True)
if num == 1:
data = data_.copy()
else:
data = data.join(data_, lsuffix=’l’, rsuffix=’r’)
num = num + 1
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
train_n.append(instance)
for n in range(l*6):
name = ‘min_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘max_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘mean_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘median_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘std_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘1st_quart_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘3st_quart_’ + str(n+1)
columns_name.append(name)
columns_name.append(‘activity’)
train_n = pd.DataFrame(train_n, columns=columns_name)
tr = train_n.drop(columns=[‘activity’])
label = train_n[‘activity’]
tr_RFE = RFE(estimator=LogisticRegression(), n_features_to_select=1).fit_transform(tr, label)
LR = LogisticRegression(penalty=’l1′)
score = cross_val_score(LR, tr_RFE, label, cv=skf)
acc.append(np.mean(score))
Break number: 1
Break number: 2
Break number: 3
Break number: 4
Break number: 5
Break number: 6
Break number: 7
Break number: 8
Break number: 9
Break number: 10
Break number: 11
Break number: 12
Break number: 13
Break number: 14
Break number: 15
Break number: 16
Break number: 17
Break number: 18
Break number: 19
Break number: 20
In [63]:
acc
Out[63]:
[0.9417582417582417,
0.8703296703296702,
0.9571428571428571,
0.9571428571428571,
0.8703296703296702,
0.8703296703296702,
0.8989010989010989,
0.8989010989010989,
0.9428571428571428,
0.9428571428571428,
0.9571428571428571,
0.9714285714285715,
0.9714285714285715,
0.9571428571428571,
0.9571428571428571,
0.9428571428571428,
0.9571428571428571,
0.9571428571428571,
0.9428571428571428,
0.9428571428571428]
l = 12
(f) Multi-class Classification (The Realistic Case)¶
In [79]:
acc = []
l=12
train_12 = []
columns_name = []
for csv in train_csv:
num=1
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
train_path = “./training/” + csv
# print(train_path)
df = pd.read_csv(train_path, skiprows=4)
factor = pd.cut(df[‘# Columns: time’], l)
for i in df.groupby(factor):
data_ = pd.DataFrame(list(i)[1]).drop(columns=[‘# Columns: time’])
data_.reset_index(drop=True,inplace=True)
if num == 1:
data = data_.copy()
else:
data = data.join(data_, lsuffix=’l’, rsuffix=’r’)
num = num + 1
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
train_12.append(instance)
for n in range(l*6):
name = ‘min_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘max_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘mean_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘median_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘std_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘1st_quart_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘3st_quart_’ + str(n+1)
columns_name.append(name)
columns_name.append(‘activity’)
In [80]:
train_12_pd = pd.DataFrame(train_12, columns=columns_name)
train_12 = pd.DataFrame(train_12, columns=columns_name)
tr_12 = train_12.drop(columns=[‘activity’])
label_tr = train_12[‘activity’]
tr_12_RFE = RFE(estimator=LogisticRegression(), n_features_to_select=1).fit_transform(tr_12, label_tr)
In [81]:
acc = []
l=12
test_12 = []
columns_name = []
for csv in test_csv:
num=1
instance = []
activity = csv.split(‘_’)[1].split(‘.’)[0]
test_path = “./test/” + csv
df = pd.read_csv(test_path, skiprows=4)
factor = pd.cut(df[‘# Columns: time’], l)
for i in df.groupby(factor):
data_ = pd.DataFrame(list(i)[1]).drop(columns=[‘# Columns: time’])
data_.reset_index(drop=True,inplace=True)
if num == 1:
data = data_.copy()
else:
data = data.join(data_, lsuffix=’l’, rsuffix=’r’)
num = num + 1
instance.extend(data.min().values)
instance.extend(data.max().values)
instance.extend(data.mean().values)
instance.extend(data.median().values)
instance.extend(data.std().values)
instance.extend(data.quantile(0.25).values)
instance.extend(data.quantile(0.75).values)
instance.append(activity)
test_12.append(instance)
for n in range(l*6):
name = ‘min_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘max_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘mean_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘median_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘std_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘1st_quart_’ + str(n+1)
columns_name.append(name)
for n in range(l*6):
name = ‘3st_quart_’ + str(n+1)
columns_name.append(name)
columns_name.append(‘activity’)
In [82]:
test_12_pd = pd.DataFrame(test_12, columns=columns_name)
test_12 = pd.DataFrame(test_12, columns=columns_name)
te_12 = test_12.drop(columns=[‘activity’])
label_te = test_12[‘activity’]
te_12_RFE = RFE(estimator=LogisticRegression(), n_features_to_select=1).fit_transform(te_12, label_te)
In [85]:
from sklearn.metrics import accuracy_score
LR = LogisticRegression(penalty=’l1′)
LR.fit( tr_12_RFE , label_tr,)
pred_LR = LR.predict(te_12_RFE)
print(“confusion matrix”)
print(confusion_matrix(label_te, pred_LR))
print(“accuracy”)
print(accuracy_score(label_te, pred_LR))
confusion matrix
[[0 0 0 2 0 0 0]
[0 0 0 2 0 0 0]
[0 0 3 0 0 0 0]
[0 0 0 3 0 0 0]
[0 0 0 2 0 1 0]
[0 0 0 1 0 1 1]
[0 0 1 0 0 0 2]]
accuracy
0.47368421052631576
test error is 52.7%
In [90]:
from sklearn.naive_bayes import GaussianNB
NB = GaussianNB()
NB.fit( tr_12_RFE ,label_tr)
pred_NB = NB.predict(te_12_RFE)
print(“confusion matrix”)
print(confusion_matrix(label_te, pred_NB))
print(“accuracy”)
print(accuracy_score(label_te, pred_NB))
confusion matrix
[[0 0 0 2 0 0 0]
[0 0 0 2 0 0 0]
[0 0 1 0 0 0 2]
[0 0 0 3 0 0 0]
[0 0 0 3 0 0 0]
[0 0 0 2 0 1 0]
[0 0 0 0 0 0 3]]
accuracy
0.42105263157894735
test error is 57.9%
Multinomial regression model is better for multi-class classification