程序代写代做代考 python final

final

In [1]:

import pandas as pd
from sklearn import linear_model

# data directory
directory = “accident/”

def readYear(year):
“””
读取指定年份的数据
“””

# read Accidents csv
acci5 = pd.read_csv(directory + “DfTRoadSafety_Accidents_” + year + “.csv”, encoding=”utf-8-sig”)

# read Casualties csv
casul5 = pd.read_csv(directory + “DfTRoadSafety_Casualties_” + year + “.csv”, encoding=”utf-8-sig”)

# read _Vehicles csv
vehicle5 = pd.read_csv(directory + “DfTRoadSafety_Vehicles_” + year + “.csv”, encoding=”utf-8-sig”)

# join the 3 csv tables to 1 using ‘Accident_Index’ as key
merged5 = pd.merge(acci5, vehicle5, on=’Accident_Index’, how=’inner’)
merged5 = pd.merge(merged5, casul5, on=’Accident_Index’, how=’inner’)

# return the combined table
return merged5

def readAll():
# read all data

t13 = readYear(“2013”)
t14 = readYear(“2014”)
t15 = readYear(“2015”)

# concat the 3 year tables
merged5 = pd.concat([t13, t14, t15])

return merged5

In [2]:

# read the data
merged5 = readAll()

# remove columns
X = merged5.drop(‘Casualty_Severity’, 1).drop(‘Accident_Index’, 1).drop(‘Vehicle_Reference_x’, 1).drop(‘Vehicle_Reference_y’,
1).drop(‘Casualty_Reference’, 1)

lc = [u’Date’,
u’Time’,
u’Local_Authority_(Highway)’,
u’LSOA_of_Accident_Location’]

for c in lc:
X = X.drop(c, 1)

# fill empty data with mean
# X is the feature matrix
X = X.fillna(X.mean())

# target class label
Y = merged5[‘Casualty_Severity’]

/Users/vagrant/anaconda42/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2881: DtypeWarning: Columns (31) have mixed types. Specify dtype option on import or set low_memory=False.
exec(code_obj, self.user_global_ns, self.user_ns)
/Users/vagrant/anaconda42/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2881: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.
exec(code_obj, self.user_global_ns, self.user_ns)

In [3]:

# show the X table
X

Out[3]:

1st_Point_of_Impact 1st_Road_Class 1st_Road_Number 2nd_Road_Class 2nd_Road_Number Accident_Severity Age_Band_of_Casualty Age_Band_of_Driver Age_of_Casualty Age_of_Driver … Special_Conditions_at_Site Speed_limit Towing_and_Articulation Urban_or_Rural_Area Vehicle_Leaving_Carriageway Vehicle_Location-Restricted_Lane Vehicle_Manoeuvre Vehicle_Type Was_Vehicle_Left_Hand_Drive? Weather_Conditions
0 1 3 3217 6 0 2 7 8 36.454136 36.324775 … 0 30 0 1 0 0 6 8 1 1
1 1 3 3217 6 0 2 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 1 1 1
2 1 3 3218 3 3218 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 3 1 8
3 1 3 3218 3 3218 3 4 6 36.454136 36.324775 … 0 30 0 1 0 0 18 3 1 8
4 0 4 450 4 412 3 10 8 36.454136 36.324775 … 0 30 0 1 0 0 14 11 1 1
5 4 4 450 5 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 4 3 1 1
6 0 4 450 5 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 1
7 2 3 3220 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 14 9 1 1
8 1 3 3220 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 1
9 1 3 3217 6 0 3 6 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 2
10 3 3 4 6 0 3 7 9 36.454136 36.324775 … 0 30 0 1 0 0 6 8 1 1
11 1 3 4 6 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 5 1 1
12 1 5 0 3 3218 3 9 5 36.454136 36.324775 … 0 30 0 1 0 0 7 1 1 1
13 0 3 3220 3 3220 3 4 -1 36.454136 36.324775 … 0 30 0 1 0 0 16 9 1 1
14 4 3 315 6 0 3 6 7 36.454136 36.324775 … 0 30 0 1 0 0 9 9 1 1
15 1 3 315 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 1 1 1
16 3 3 3220 -1 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 6 5 1 1
17 1 3 3220 -1 0 3 6 8 36.454136 36.324775 … 0 30 0 1 0 0 18 19 1 1
18 3 3 315 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 1 1 2
19 1 3 315 6 0 3 6 -1 36.454136 36.324775 … 0 30 0 1 0 0 9 9 1 2
20 3 6 0 6 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
21 3 6 0 6 0 3 -1 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
22 1 6 0 6 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 19 1 3
23 1 6 0 6 0 3 -1 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 19 1 3
24 2 6 0 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 4 3 1 1
25 1 6 0 6 0 3 6 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 1
26 3 3 3212 -1 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
27 1 3 3212 -1 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
28 1 3 3212 -1 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 2 9 1 3
29 1 3 3212 -1 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 2 9 1 3
… … … … … … … … … … … … … … … … … … … … … …
319897 1 2 74 -1 0 3 6 9 29.000000 57.000000 … 0 70 0 2 0 0 18 9 1 3
319898 1 2 74 -1 0 3 4 9 17.000000 57.000000 … 0 70 0 2 0 0 18 9 1 3
319899 2 2 74 -1 0 3 7 4 36.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319900 2 2 74 -1 0 3 6 4 30.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319901 2 2 74 -1 0 3 1 4 0.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319902 2 2 74 -1 0 3 5 4 25.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319903 2 2 74 -1 0 3 4 4 19.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319904 2 2 74 -1 0 3 6 4 29.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319905 2 2 74 -1 0 3 4 4 17.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319906 2 2 74 -1 0 3 7 -1 36.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319907 2 2 74 -1 0 3 6 -1 30.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319908 2 2 74 -1 0 3 1 -1 0.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319909 2 2 74 -1 0 3 5 -1 25.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319910 2 2 74 -1 0 3 4 -1 19.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319911 2 2 74 -1 0 3 6 -1 29.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319912 2 2 74 -1 0 3 4 -1 17.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319913 2 2 74 -1 0 3 7 6 36.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319914 2 2 74 -1 0 3 6 6 30.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319915 2 2 74 -1 0 3 1 6 0.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319916 2 2 74 -1 0 3 5 6 25.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319917 2 2 74 -1 0 3 4 6 19.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319918 2 2 74 -1 0 3 6 6 29.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319919 2 2 74 -1 0 3 4 6 17.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319920 1 2 74 -1 0 3 7 4 36.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319921 1 2 74 -1 0 3 6 4 30.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319922 1 2 74 -1 0 3 1 4 0.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319923 1 2 74 -1 0 3 5 4 25.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319924 1 2 74 -1 0 3 4 4 19.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319925 1 2 74 -1 0 3 6 4 29.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319926 1 2 74 -1 0 3 4 4 17.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3

1055110 rows × 58 columns

In [4]:

# show Y
Y

Out[4]:

0 2
1 2
2 3
3 3
4 3
5 3
6 3
7 3
8 3
9 3
10 3
11 3
12 3
13 3
14 3
15 3
16 3
17 3
18 3
19 3
20 3
21 3
22 3
23 3
24 3
25 3
26 3
27 3
28 3
29 3
..
319897 3
319898 3
319899 3
319900 3
319901 3
319902 3
319903 3
319904 3
319905 3
319906 3
319907 3
319908 3
319909 3
319910 3
319911 3
319912 3
319913 3
319914 3
319915 3
319916 3
319917 3
319918 3
319919 3
319920 3
319921 3
319922 3
319923 3
319924 3
319925 3
319926 3
Name: Casualty_Severity, dtype: int64

In [5]:

# the number of label 1, label 2, label 3
print [sum(Y == 1), sum(Y == 2), sum(Y == 3)]

# the ratio of label 3
print sum(Y == 3)*1.0 / len(Y)

[9095, 111143, 934872]
0.886042213608

In [6]:

# create Logistic Regression model
logreg = linear_model.LogisticRegression(C=1e5, class_weight = {1:1000, 2:10, 3:1})

# training the model
logreg.fit(X, Y)

# get the predict label
res = logreg.predict(X)
print([sum(res == 1), sum(res == 2), sum(res == 3)])

# compute the training accuracy
print(sum(res == Y)*1.0 / len(Y))

[1274, 120124, 933712]
0.939066068941

In [7]:

# absolute value of model coefficients
t = abs(logreg.coef_)
cosum = sum(t, 0)

# create coefficients series
model_coefficient = pd.Series(cosum, index=X.columns)

# output the prediction and correct label to file
outD = pd.DataFrame({“pred”: res, “correct”: Y})
outD.to_csv(“compare.csv”, index=False, header=True)

In [8]:

# show the top 10 features with highest coefficient value
model_coefficient.sort_values(ascending=False).head(30)

Out[8]:

Accident_Severity 7.743353
Sex_of_Casualty 0.457001
Did_Police_Officer_Attend_Scene_of_Accident 0.363090
Latitude 0.338320
Car_Passenger 0.317477
Urban_or_Rural_Area 0.144427
Age_Band_of_Casualty 0.137477
Pedestrian_Location 0.134923
Road_Type 0.121066
Bus_or_Coach_Passenger 0.117226
Road_Surface_Conditions 0.106327
Light_Conditions 0.104804
Carriageway_Hazards 0.103998
Skidding_and_Overturning 0.103755
Sex_of_Driver 0.103570
Casualty_Class 0.083644
1st_Road_Class 0.079734
Weather_Conditions 0.078550
Vehicle_Leaving_Carriageway 0.077897
Propulsion_Code 0.075065
Junction_Control 0.068812
Vehicle_Location-Restricted_Lane 0.065049
Hit_Object_off_Carriageway 0.064940
Hit_Object_in_Carriageway 0.064741
Pedestrian_Movement 0.055331
Number_of_Casualties 0.054157
Special_Conditions_at_Site 0.052575
Towing_and_Articulation 0.047844
Longitude 0.047351
Casualty_Home_Area_Type 0.046271
dtype: float64

In [9]:

# show the prediction and correct label
outD

Out[9]:

correct pred
0 2 2
1 2 2
2 3 3
3 3 3
4 3 3
5 3 3
6 3 3
7 3 3
8 3 3
9 3 3
10 3 3
11 3 3
12 3 3
13 3 3
14 3 3
15 3 3
16 3 3
17 3 3
18 3 3
19 3 3
20 3 3
21 3 3
22 3 3
23 3 3
24 3 3
25 3 3
26 3 3
27 3 3
28 3 3
29 3 3
… … …
319897 3 3
319898 3 3
319899 3 3
319900 3 3
319901 3 3
319902 3 3
319903 3 3
319904 3 3
319905 3 3
319906 3 3
319907 3 3
319908 3 3
319909 3 3
319910 3 3
319911 3 3
319912 3 3
319913 3 3
319914 3 3
319915 3 3
319916 3 3
319917 3 3
319918 3 3
319919 3 3
319920 3 3
319921 3 3
319922 3 3
319923 3 3
319924 3 3
319925 3 3
319926 3 3

1055110 rows × 2 columns

In [ ]: