程序代写代做代考 traffic_accident

traffic_accident

In [47]:

import pandas as pd
from sklearn import linear_model

directory = “accident/”

def readYear(year):

acci5 = pd.read_csv(directory + “DfTRoadSafety_Accidents_” + year + “.csv”, encoding=”utf-8-sig”)

casul5 = pd.read_csv(directory + “DfTRoadSafety_Casualties_” + year + “.csv”, encoding=”utf-8-sig”)

vehicle5 = pd.read_csv(directory + “DfTRoadSafety_Vehicles_” + year + “.csv”, encoding=”utf-8-sig”)

merged5 = pd.merge(acci5, vehicle5, on=’Accident_Index’, how=’inner’)
merged5 = pd.merge(merged5, casul5, on=’Accident_Index’, how=’inner’)

return merged5

def readAll():

t13 = readYear(“2013”)
t14 = readYear(“2014”)
t15 = readYear(“2015”)

merged5 = pd.concat([t13, t14, t15])

return merged5

In [52]:

merged5 = readAll()

X = merged5.drop(‘Casualty_Severity’, 1).drop(‘Accident_Index’, 1).drop(‘Vehicle_Reference_x’, 1).drop(‘Vehicle_Reference_y’,
1).drop(‘Casualty_Reference’, 1)

lc = [u’Date’,
u’Time’,
u’Local_Authority_(Highway)’,
u’LSOA_of_Accident_Location’]

for c in lc:
X = X.drop(c, 1)

X = X.fillna(X.mean())

Y = merged5[‘Casualty_Severity’]

In [66]:

X

Out[66]:

1st_Point_of_Impact 1st_Road_Class 1st_Road_Number 2nd_Road_Class 2nd_Road_Number Accident_Severity Age_Band_of_Casualty Age_Band_of_Driver Age_of_Casualty Age_of_Driver … Special_Conditions_at_Site Speed_limit Towing_and_Articulation Urban_or_Rural_Area Vehicle_Leaving_Carriageway Vehicle_Location-Restricted_Lane Vehicle_Manoeuvre Vehicle_Type Was_Vehicle_Left_Hand_Drive? Weather_Conditions
0 1 3 3217 6 0 2 7 8 36.454136 36.324775 … 0 30 0 1 0 0 6 8 1 1
1 1 3 3217 6 0 2 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 1 1 1
2 1 3 3218 3 3218 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 3 1 8
3 1 3 3218 3 3218 3 4 6 36.454136 36.324775 … 0 30 0 1 0 0 18 3 1 8
4 0 4 450 4 412 3 10 8 36.454136 36.324775 … 0 30 0 1 0 0 14 11 1 1
5 4 4 450 5 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 4 3 1 1
6 0 4 450 5 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 1
7 2 3 3220 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 14 9 1 1
8 1 3 3220 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 1
9 1 3 3217 6 0 3 6 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 2
10 3 3 4 6 0 3 7 9 36.454136 36.324775 … 0 30 0 1 0 0 6 8 1 1
11 1 3 4 6 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 5 1 1
12 1 5 0 3 3218 3 9 5 36.454136 36.324775 … 0 30 0 1 0 0 7 1 1 1
13 0 3 3220 3 3220 3 4 -1 36.454136 36.324775 … 0 30 0 1 0 0 16 9 1 1
14 4 3 315 6 0 3 6 7 36.454136 36.324775 … 0 30 0 1 0 0 9 9 1 1
15 1 3 315 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 1 1 1
16 3 3 3220 -1 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 6 5 1 1
17 1 3 3220 -1 0 3 6 8 36.454136 36.324775 … 0 30 0 1 0 0 18 19 1 1
18 3 3 315 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 18 1 1 2
19 1 3 315 6 0 3 6 -1 36.454136 36.324775 … 0 30 0 1 0 0 9 9 1 2
20 3 6 0 6 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
21 3 6 0 6 0 3 -1 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
22 1 6 0 6 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 19 1 3
23 1 6 0 6 0 3 -1 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 19 1 3
24 2 6 0 6 0 3 6 6 36.454136 36.324775 … 0 30 0 1 0 0 4 3 1 1
25 1 6 0 6 0 3 6 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 1
26 3 3 3212 -1 0 3 7 7 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
27 1 3 3212 -1 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 18 9 1 3
28 1 3 3212 -1 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 2 9 1 3
29 1 3 3212 -1 0 3 7 -1 36.454136 36.324775 … 0 30 0 1 0 0 2 9 1 3
… … … … … … … … … … … … … … … … … … … … … …
319897 1 2 74 -1 0 3 6 9 29.000000 57.000000 … 0 70 0 2 0 0 18 9 1 3
319898 1 2 74 -1 0 3 4 9 17.000000 57.000000 … 0 70 0 2 0 0 18 9 1 3
319899 2 2 74 -1 0 3 7 4 36.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319900 2 2 74 -1 0 3 6 4 30.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319901 2 2 74 -1 0 3 1 4 0.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319902 2 2 74 -1 0 3 5 4 25.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319903 2 2 74 -1 0 3 4 4 19.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319904 2 2 74 -1 0 3 6 4 29.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319905 2 2 74 -1 0 3 4 4 17.000000 19.000000 … 0 70 0 2 1 0 18 9 1 3
319906 2 2 74 -1 0 3 7 -1 36.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319907 2 2 74 -1 0 3 6 -1 30.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319908 2 2 74 -1 0 3 1 -1 0.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319909 2 2 74 -1 0 3 5 -1 25.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319910 2 2 74 -1 0 3 4 -1 19.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319911 2 2 74 -1 0 3 6 -1 29.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319912 2 2 74 -1 0 3 4 -1 17.000000 -1.000000 … 0 70 0 2 1 0 18 9 1 3
319913 2 2 74 -1 0 3 7 6 36.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319914 2 2 74 -1 0 3 6 6 30.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319915 2 2 74 -1 0 3 1 6 0.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319916 2 2 74 -1 0 3 5 6 25.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319917 2 2 74 -1 0 3 4 6 19.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319918 2 2 74 -1 0 3 6 6 29.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319919 2 2 74 -1 0 3 4 6 17.000000 29.000000 … 0 70 0 2 0 0 18 9 1 3
319920 1 2 74 -1 0 3 7 4 36.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319921 1 2 74 -1 0 3 6 4 30.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319922 1 2 74 -1 0 3 1 4 0.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319923 1 2 74 -1 0 3 5 4 25.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319924 1 2 74 -1 0 3 4 4 19.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319925 1 2 74 -1 0 3 6 4 29.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3
319926 1 2 74 -1 0 3 4 4 17.000000 17.000000 … 0 70 0 2 5 0 18 9 1 3

1055110 rows × 58 columns

In [67]:

Y

Out[67]:

0 2
1 2
2 3
3 3
4 3
5 3
6 3
7 3
8 3
9 3
10 3
11 3
12 3
13 3
14 3
15 3
16 3
17 3
18 3
19 3
20 3
21 3
22 3
23 3
24 3
25 3
26 3
27 3
28 3
29 3
..
319897 3
319898 3
319899 3
319900 3
319901 3
319902 3
319903 3
319904 3
319905 3
319906 3
319907 3
319908 3
319909 3
319910 3
319911 3
319912 3
319913 3
319914 3
319915 3
319916 3
319917 3
319918 3
319919 3
319920 3
319921 3
319922 3
319923 3
319924 3
319925 3
319926 3
Name: Casualty_Severity, dtype: int64

In [58]:

print [sum(Y == 1), sum(Y == 2), sum(Y == 3)]
print sum(Y == 3)*1.0 / len(Y)

[9095, 111143, 934872]
0.886042213608

In [60]:

logreg = linear_model.LogisticRegression(C=1e5, class_weight = {1:1000, 2:10, 3:1})
logreg.fit(X, Y)
res = logreg.predict(X)
print([sum(res == 1), sum(res == 2), sum(res == 3)])
print(sum(res == Y)*1.0 / len(Y))

[1274, 120124, 933712]
0.939066068941

In [71]:

t = abs(logreg.coef_)
cosum = sum(t, 0)

model_coefficient = pd.Series(cosum, index=X.columns)

# print(model_coefficient.shape)
# print(top10)

outD = pd.DataFrame({“pred”: res, “correct”: Y})
outD.to_csv(“compare.csv”, index=False, header=True)

In [72]:

model_coefficient.sort_values(ascending=False).head(10)

Out[72]:

Accident_Severity 7.743353
Sex_of_Casualty 0.457001
Did_Police_Officer_Attend_Scene_of_Accident 0.363090
Latitude 0.338320
Car_Passenger 0.317477
Urban_or_Rural_Area 0.144427
Age_Band_of_Casualty 0.137477
Pedestrian_Location 0.134923
Road_Type 0.121066
Bus_or_Coach_Passenger 0.117226
dtype: float64

In [68]:

outD

Out[68]:

correct pred
0 2 2
1 2 2
2 3 3
3 3 3
4 3 3
5 3 3
6 3 3
7 3 3
8 3 3
9 3 3
10 3 3
11 3 3
12 3 3
13 3 3
14 3 3
15 3 3
16 3 3
17 3 3
18 3 3
19 3 3
20 3 3
21 3 3
22 3 3
23 3 3
24 3 3
25 3 3
26 3 3
27 3 3
28 3 3
29 3 3
… … …
319897 3 3
319898 3 3
319899 3 3
319900 3 3
319901 3 3
319902 3 3
319903 3 3
319904 3 3
319905 3 3
319906 3 3
319907 3 3
319908 3 3
319909 3 3
319910 3 3
319911 3 3
319912 3 3
319913 3 3
319914 3 3
319915 3 3
319916 3 3
319917 3 3
319918 3 3
319919 3 3
319920 3 3
319921 3 3
319922 3 3
319923 3 3
319924 3 3
319925 3 3
319926 3 3

1055110 rows × 2 columns

In [ ]: