程序代写代做代考 ml-accident-checkpoint

ml-accident-checkpoint

In [1]:

import numpy as np
import pandas as pd
from scipy.stats import skew
from sklearn.linear_model import LassoCV
import sklearn.cross_validation as cv
import matplotlib.pyplot as plt

In [31]:

acci5 = pd.read_csv(“/Users/vagrant/tasks-2017/qq-2363548732/ml-data-mining-9500/accident/DfTRoadSafety_Accidents_2015.csv”, encoding=”utf-8-sig”)

In [32]:

acci5

Out[32]:

Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date … Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
0 201501BS70001 525130.0 180050.0 -0.198465 51.505538 1 3 1 1 12/01/2015 … 0 0 4 1 1 0 0 1 1 E01002825
1 201501BS70002 526530.0 178560.0 -0.178838 51.491836 1 3 1 1 12/01/2015 … 0 0 1 1 1 0 0 1 1 E01002820
2 201501BS70004 524610.0 181080.0 -0.205590 51.514910 1 3 1 1 12/01/2015 … 0 1 4 2 2 0 0 1 1 E01002833
3 201501BS70005 524420.0 181080.0 -0.208327 51.514952 1 3 1 1 13/01/2015 … 0 0 1 1 2 0 0 1 2 E01002874
4 201501BS70008 524630.0 179040.0 -0.206022 51.496572 1 2 2 1 09/01/2015 … 0 5 1 2 2 0 0 1 2 E01002814
5 201501BS70009 525480.0 179530.0 -0.193610 51.500788 1 3 2 1 15/01/2015 … 0 4 1 1 2 0 0 1 1 E01002816
6 201501BS70010 526890.0 178940.0 -0.173519 51.495171 1 3 2 1 15/01/2015 … 0 5 1 8 2 0 0 1 1 E01002821
7 201501BS70011 527590.0 178660.0 -0.163542 51.492497 1 3 2 1 18/01/2015 … 0 0 1 1 1 0 0 1 1 E01002861
8 201501BS70012 524170.0 180930.0 -0.211980 51.513659 1 3 2 1 16/01/2015 … 0 1 1 1 1 0 0 1 1 E01002879
9 201501BS70013 525010.0 181200.0 -0.199786 51.515900 1 3 2 2 20/01/2015 … 0 0 1 8 1 0 0 1 1 E01002832
10 201501BS70014 523850.0 181450.0 -0.216407 51.518402 1 3 2 1 23/01/2015 … 0 0 4 2 2 0 0 1 1 E01002909
11 201501BS70015 526110.0 179650.0 -0.184495 51.501726 1 3 1 2 22/01/2015 … 0 5 1 1 1 0 0 1 1 E01002891
12 201501BS70016 526970.0 178510.0 -0.172522 51.491289 1 3 2 1 21/01/2015 … 0 0 4 1 1 0 0 1 1 E01002914
13 201501BS70017 525350.0 178640.0 -0.195797 51.492818 1 3 1 1 21/01/2015 … 0 0 1 1 1 0 0 1 1 E01002847
14 201501BS70018 527370.0 178170.0 -0.166886 51.488143 1 3 1 1 23/01/2015 … 0 0 1 1 1 0 0 1 1 E01002899
15 201501BS70019 527370.0 178180.0 -0.166882 51.488233 1 3 1 1 23/01/2015 … 0 0 4 1 1 0 0 1 1 E01002899
16 201501BS70020 524020.0 181050.0 -0.214099 51.514770 1 3 2 1 23/01/2015 … 0 0 1 1 1 0 0 1 1 E01002878
17 201501BS70022 526200.0 177720.0 -0.183889 51.484361 1 3 2 1 25/01/2015 … 0 5 4 1 1 0 0 1 1 E01002912
18 201501BS70023 524450.0 181560.0 -0.207725 51.519259 1 3 1 1 24/01/2015 … 0 0 4 1 1 0 0 1 1 E01002855
19 201501BS70024 526940.0 178480.0 -0.172964 51.491026 1 3 2 1 23/01/2015 … 0 0 1 1 1 0 0 1 1 E01002914
20 201501BS70026 527600.0 179480.0 -0.163102 51.499864 1 2 1 1 23/01/2015 … 0 4 4 2 2 0 0 1 1 E01002819
21 201501BS70027 525560.0 178170.0 -0.192941 51.488548 1 3 1 1 20/01/2015 … 0 5 4 1 1 0 0 1 2 E01002846
22 201501BS70029 525510.0 178130.0 -0.193675 51.488199 1 3 2 1 20/01/2015 … 0 0 1 1 1 0 0 1 2 E01002894
23 201501BS70030 523910.0 181390.0 -0.215564 51.517850 1 3 2 2 28/01/2015 … 0 0 1 2 2 0 0 1 1 E01002878
24 201501BS70033 526090.0 177600.0 -0.185515 51.483307 1 3 2 2 30/01/2015 … 0 5 4 1 1 0 0 1 1 E01002912
25 201501BS70034 527560.0 179160.0 -0.163793 51.496997 1 3 1 1 29/01/2015 … 0 5 4 1 1 0 0 1 1 E01002818
26 201501BS70035 527950.0 178890.0 -0.158276 51.494483 1 3 1 1 29/01/2015 … 0 1 4 1 1 0 0 1 1 E01002859
27 201501BS70036 525390.0 180190.0 -0.194671 51.506739 1 3 1 1 30/01/2015 … 0 0 1 1 1 0 0 1 1 E01002827
28 201501BS70037 527350.0 177640.0 -0.167365 51.483385 1 3 2 1 01/02/2015 … 0 5 1 1 2 0 0 1 1 E01002844
29 201501BS70038 527340.0 178160.0 -0.167321 51.488060 1 3 1 1 30/01/2015 … 0 0 4 1 1 0 0 1 1 E01002899
… … … … … … … … … … … … … … … … … … … … … …
140056 2015984126215 338953.0 576296.0 -2.957692 55.077345 98 3 3 2 17/08/2015 … 0 0 1 1 1 0 0 2 1 NaN
140057 2015984126815 317327.0 566149.0 -3.293388 54.983052 98 3 2 2 21/08/2015 … 0 0 1 1 1 0 0 2 1 NaN
140058 2015984126915 318128.0 575574.0 -3.283579 55.067857 98 3 2 3 24/08/2015 … 0 0 6 2 5 0 0 2 1 NaN
140059 2015984127315 320333.0 567053.0 -3.246680 54.991664 98 3 2 1 17/08/2015 … 0 0 1 1 1 0 0 2 1 NaN
140060 2015984127515 319848.0 566515.0 -3.254108 54.986752 98 3 2 1 23/08/2015 … 0 0 1 1 1 0 0 2 2 NaN
140061 2015984127615 338620.0 595149.0 -2.966998 55.246691 98 3 1 1 27/08/2015 … 0 0 1 2 2 0 0 2 1 NaN
140062 2015984128215 327982.0 570650.0 -3.128065 55.025147 98 1 1 2 30/08/2015 … 0 0 1 1 1 0 0 2 1 NaN
140063 2015984128515 318980.0 575210.0 -3.270137 55.064727 98 2 1 1 04/09/2015 … 0 0 1 1 1 0 0 2 1 NaN
140064 2015984129115 309715.0 598908.0 -3.422724 55.276010 98 3 2 1 09/09/2015 … 0 0 6 1 1 0 0 2 1 NaN
140065 2015984129715 330766.0 567430.0 -3.083745 54.996613 98 3 2 1 10/09/2015 … 0 0 1 1 1 0 0 2 1 NaN
140066 2015984130315 312312.0 605860.0 -3.384031 55.338930 98 3 2 1 27/09/2015 … 0 0 1 1 1 0 0 2 2 NaN
140067 2015984130815 310460.0 604532.0 -3.412795 55.326666 98 2 1 1 03/10/2015 … 0 0 1 1 1 0 0 2 1 NaN
140068 2015984131215 328774.0 567001.0 -3.114769 54.992477 98 3 1 1 27/09/2015 … 0 0 6 1 1 0 0 2 1 NaN
140069 2015984132215 320174.0 566497.0 -3.249009 54.986643 98 3 1 1 20/10/2015 … 0 0 1 1 1 0 0 2 1 NaN
140070 2015984132315 325193.0 568986.0 -3.171246 55.009785 98 3 2 2 23/10/2015 … 0 0 1 1 1 0 0 2 1 NaN
140071 2015984132815 315619.0 578498.0 -3.323723 55.093705 98 2 2 1 02/11/2015 … 0 0 1 1 1 0 0 2 1 NaN
140072 2015984133015 319506.0 566586.0 -3.259471 54.987335 98 3 2 1 04/11/2015 … 0 4 4 1 2 0 0 2 1 NaN
140073 2015984133115 308738.0 579979.0 -3.431983 55.105789 98 3 1 1 04/11/2015 … 0 0 6 1 2 0 0 2 1 NaN
140074 2015984133815 314076.0 578925.0 -3.348023 55.097276 98 2 3 4 09/11/2015 … 0 0 1 2 2 0 0 2 1 NaN
140075 2015984134815 310142.0 596869.0 -3.415353 55.257772 98 3 1 1 28/10/2015 … 0 0 6 1 1 0 7 2 1 NaN
140076 2015984135815 338928.0 576364.0 -2.958098 55.077953 98 3 2 1 20/11/2015 … 0 0 6 2 2 0 0 2 1 NaN
140077 2015984136815 324728.0 566339.0 -3.177818 54.985933 98 3 1 2 24/11/2015 … 0 0 4 1 2 0 0 2 1 NaN
140078 2015984137515 327369.0 566993.0 -3.136722 54.992202 98 3 2 1 01/12/2015 … 0 0 6 4 2 0 0 2 1 NaN
140079 2015984137615 319301.0 566593.0 -3.262676 54.987365 98 3 2 1 02/12/2015 … 0 5 4 2 2 0 0 2 1 NaN
140080 2015984139015 304440.0 580166.0 -3.499388 55.106659 98 3 1 1 13/12/2015 … 0 0 6 7 4 0 0 2 2 NaN
140081 2015984139115 312087.0 570791.0 -3.376671 55.023855 98 3 3 1 11/12/2015 … 0 0 1 1 2 0 0 2 1 NaN
140082 2015984139715 320671.0 569791.0 -3.242159 55.016316 98 3 2 1 02/12/2015 … 0 0 1 1 2 0 0 2 1 NaN
140083 2015984140215 311731.0 586343.0 -3.387067 55.163502 98 2 1 4 23/12/2015 … 0 0 6 4 2 0 0 2 1 NaN
140084 2015984140515 328273.0 570137.0 -3.123385 55.020580 98 3 3 3 26/12/2015 … 0 0 1 2 2 0 0 2 1 NaN
140085 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 0 0 6 3 4 0 0 2 1 NaN

140086 rows × 32 columns

In [33]:

casul5 = pd.read_csv(“/Users/vagrant/tasks-2017/qq-2363548732/ml-data-mining-9500/accident/DfTRoadSafety_Casualties_2015.csv”, encoding=”utf-8-sig”)

In [34]:

vehicle5 = pd.read_csv(“/Users/vagrant/tasks-2017/qq-2363548732/ml-data-mining-9500/accident/DfTRoadSafety_Vehicles_2015.csv”, encoding=”utf-8-sig”)

In [35]:

casul5

Out[35]:

Accident_Index Vehicle_Reference Casualty_Reference Casualty_Class Sex_of_Casualty Age_of_Casualty Age_Band_of_Casualty Casualty_Severity Pedestrian_Location Pedestrian_Movement Car_Passenger Bus_or_Coach_Passenger Pedestrian_Road_Maintenance_Worker Casualty_Type Casualty_Home_Area_Type
0 201501BS70001 1 1 3 2 41 7 3 5 1 0 0 2 0 -1
1 201501BS70002 1 1 3 1 24 5 3 9 9 0 0 2 0 1
2 201501BS70004 1 1 3 2 33 6 3 1 3 0 0 2 0 1
3 201501BS70005 1 1 3 2 9 2 3 5 1 0 0 2 0 1
4 201501BS70008 1 1 1 1 48 8 2 0 0 0 0 0 1 1
5 201501BS70009 1 1 1 1 37 7 3 0 0 0 0 0 3 1
6 201501BS70010 2 1 1 1 37 7 3 0 0 0 0 0 1 1
7 201501BS70011 1 1 1 2 85 11 3 0 0 0 0 0 9 1
8 201501BS70012 2 1 1 1 61 9 3 0 0 0 0 0 5 1
9 201501BS70013 1 1 1 1 41 7 3 0 0 0 0 0 8 1
10 201501BS70013 2 2 1 1 46 8 3 0 0 0 0 0 8 1
11 201501BS70014 1 1 1 1 30 6 3 0 0 0 0 0 9 1
12 201501BS70015 1 1 1 1 39 7 3 0 0 0 0 0 1 1
13 201501BS70015 1 2 3 1 26 6 3 4 1 0 0 2 0 1
14 201501BS70016 2 1 1 1 31 6 3 0 0 0 0 0 1 1
15 201501BS70017 1 1 3 1 25 5 3 5 1 0 0 2 0 1
16 201501BS70018 1 1 3 2 59 9 3 6 9 0 0 2 0 1
17 201501BS70019 1 1 2 1 87 11 3 0 0 0 3 0 11 1
18 201501BS70020 2 1 1 1 41 7 3 0 0 0 0 0 1 1
19 201501BS70022 1 1 1 1 29 6 3 0 0 0 0 0 4 1
20 201501BS70023 1 1 3 1 36 7 3 9 5 0 0 2 0 1
21 201501BS70024 1 1 1 1 43 7 3 0 0 0 0 0 5 -1
22 201501BS70026 1 1 3 1 58 9 2 4 1 0 0 2 0 1
23 201501BS70027 1 1 3 2 55 8 3 1 1 0 0 2 0 1
24 201501BS70029 1 1 1 1 47 8 3 0 0 0 0 0 1 1
25 201501BS70030 1 1 1 1 58 9 3 0 0 0 0 0 19 1
26 201501BS70030 2 2 1 2 23 5 3 0 0 0 0 0 9 1
27 201501BS70033 1 1 1 1 35 6 3 0 0 0 0 0 8 1
28 201501BS70033 2 2 1 1 41 7 3 0 0 0 0 0 8 1
29 201501BS70034 1 1 3 2 52 8 3 1 1 0 0 2 0 1
… … … … … … … … … … … … … … … …
186179 2015984132815 1 1 1 1 34 6 2 0 0 0 0 0 9 -1
186180 2015984133015 1 1 1 1 27 6 3 0 0 0 0 0 3 3
186181 2015984133115 1 1 1 2 20 4 3 0 0 0 0 0 9 3
186182 2015984133815 1 1 1 2 72 10 2 0 0 0 0 0 9 2
186183 2015984133815 2 2 1 1 61 9 3 0 0 0 0 0 10 2
186184 2015984133815 2 3 2 2 17 4 2 0 0 1 0 0 10 3
186185 2015984133815 2 4 2 2 17 4 2 0 0 2 0 0 10 2
186186 2015984134815 1 1 1 1 25 5 3 0 0 0 0 0 3 3
186187 2015984135815 2 1 1 1 56 9 3 0 0 0 0 0 1 3
186188 2015984136815 1 1 3 2 13 3 3 8 1 0 0 0 0 3
186189 2015984136815 1 2 3 2 13 3 3 8 1 0 0 0 0 3
186190 2015984137515 2 1 1 1 62 9 3 0 0 0 0 0 9 2
186191 2015984137615 2 1 1 1 46 8 3 0 0 0 0 0 8 2
186192 2015984139015 1 1 1 1 23 5 3 0 0 0 0 0 3 2
186193 2015984139115 1 1 1 1 38 7 3 0 0 0 0 0 19 2
186194 2015984139715 2 1 1 2 24 5 3 0 0 0 0 0 9 3
186195 2015984140215 1 1 1 1 22 5 3 0 0 0 0 0 9 -1
186196 2015984140215 1 2 2 1 31 6 2 0 0 2 0 0 9 -1
186197 2015984140215 1 3 2 1 24 5 3 0 0 1 0 0 9 -1
186198 2015984140215 1 4 2 1 25 5 2 0 0 2 0 0 9 -1
186199 2015984140515 1 1 1 2 75 10 3 0 0 0 0 0 9 3
186200 2015984140515 2 2 1 1 20 4 3 0 0 0 0 0 9 1
186201 2015984140515 2 3 2 2 17 4 3 0 0 2 0 0 9 1
186202 2015984141415 5 5 1 1 36 7 3 0 0 0 0 0 9 1
186203 2015984141415 5 6 2 2 30 6 3 0 0 1 0 0 9 1
186204 2015984141415 5 7 2 2 0 1 3 0 0 2 0 0 9 1
186205 2015984141415 7 4 1 1 25 5 3 0 0 0 0 0 9 1
186206 2015984141415 10 3 1 1 19 4 3 0 0 0 0 0 9 2
186207 2015984141415 12 2 1 1 29 6 3 0 0 0 0 0 9 3
186208 2015984141415 13 1 1 2 17 4 3 0 0 0 0 0 9 1

186209 rows × 15 columns

In [36]:

vehicle5

Out[36]:

Accident_Index Vehicle_Reference Vehicle_Type Towing_and_Articulation Vehicle_Manoeuvre Vehicle_Location-Restricted_Lane Junction_Location Skidding_and_Overturning Hit_Object_in_Carriageway Vehicle_Leaving_Carriageway … Was_Vehicle_Left_Hand_Drive? Journey_Purpose_of_Driver Sex_of_Driver Age_of_Driver Age_Band_of_Driver Engine_Capacity_(CC) Propulsion_Code Age_of_Vehicle Driver_IMD_Decile Driver_Home_Area_Type
0 201501BS70001 1 19 0 9 0 8 0 0 0 … 1 1 1 -1 -1 2143 2 4 -1 -1
1 201501BS70002 1 9 0 9 0 8 0 0 0 … 1 6 1 -1 -1 1600 1 3 -1 -1
2 201501BS70004 1 9 0 9 0 2 0 0 0 … 1 6 1 30 6 1686 2 10 -1 1
3 201501BS70005 1 9 0 9 0 2 0 0 0 … 1 6 1 -1 -1 -1 -1 -1 -1 -1
4 201501BS70008 1 1 0 18 0 8 0 0 0 … 1 2 1 48 8 -1 -1 -1 -1 1
5 201501BS70008 2 9 0 7 0 8 0 0 0 … 1 6 1 -1 -1 -1 -1 -1 -1 -1
6 201501BS70009 1 3 0 18 0 8 0 0 0 … 1 6 1 37 7 124 1 3 -1 1
7 201501BS70009 2 19 0 6 0 8 0 0 0 … 1 1 1 44 7 2402 2 5 -1 1
8 201501BS70010 1 9 0 9 0 8 0 0 0 … 1 6 1 37 7 1461 2 10 -1 1
9 201501BS70010 2 1 0 18 0 8 0 0 0 … 1 6 1 37 7 -1 -1 -1 -1 1
10 201501BS70011 1 9 0 12 0 0 0 0 0 … 1 6 2 85 11 1388 1 13 -1 1
11 201501BS70011 2 9 0 18 0 0 0 0 0 … 1 6 1 42 7 1995 2 3 -1 1
12 201501BS70012 1 9 0 13 0 8 0 0 0 … 1 6 2 39 7 1248 2 8 -1 1
13 201501BS70012 2 5 0 7 0 8 0 0 0 … 1 6 1 61 9 998 1 6 -1 1
14 201501BS70013 1 8 0 18 0 8 0 0 0 … 1 6 1 41 7 1598 2 5 -1 1
15 201501BS70013 2 8 0 9 0 8 0 0 0 … 1 1 1 46 8 1798 1 6 -1 1
16 201501BS70014 1 9 0 18 0 8 2 10 0 … 1 6 1 30 6 1461 2 11 -1 1
17 201501BS70014 2 9 0 18 0 8 0 0 0 … 1 6 3 -1 -1 -1 -1 -1 -1 -1
18 201501BS70015 1 1 0 18 0 2 0 0 0 … 1 6 1 39 7 -1 -1 -1 -1 1
19 201501BS70016 1 4 0 18 0 8 0 0 0 … 1 6 2 29 6 124 1 8 -1 1
20 201501BS70016 2 1 0 9 0 8 0 0 0 … 1 6 1 31 6 -1 -1 -1 -1 1
21 201501BS70017 1 9 0 18 0 8 0 0 0 … 1 6 3 -1 -1 -1 -1 -1 -1 -1
22 201501BS70018 1 11 0 4 0 1 0 0 0 … 1 1 1 40 7 4500 2 3 -1 1
23 201501BS70019 1 11 0 4 0 1 0 0 0 … 1 1 1 40 7 4500 2 3 -1 1
24 201501BS70020 1 9 0 17 0 8 0 0 0 … 1 6 2 47 8 -1 -1 -1 -1 1
25 201501BS70020 2 1 0 18 0 8 0 0 0 … 1 6 1 41 7 -1 -1 -1 -1 1
26 201501BS70022 1 4 0 18 0 0 0 0 0 … 1 1 1 29 6 125 1 5 -1 1
27 201501BS70022 2 8 0 6 0 0 0 0 0 … 1 1 1 36 7 998 1 1 -1 1
28 201501BS70023 1 9 0 18 0 0 0 0 0 … 1 6 1 -1 -1 2435 1 14 -1 1
29 201501BS70024 1 5 0 18 0 8 0 0 0 … 1 6 1 43 7 680 1 7 -1 -1
… … … … … … … … … … … … … … … … … … … … … …
257869 2015984135815 1 9 0 18 0 1 0 0 0 … 1 6 1 74 10 2179 2 2 -1 3
257870 2015984135815 2 1 0 7 0 5 0 0 0 … 1 6 1 56 9 -1 -1 -1 -1 3
257871 2015984136815 1 9 0 9 0 1 0 0 0 … 1 6 2 50 8 1989 1 12 -1 3
257872 2015984137515 1 9 0 18 0 0 0 0 0 … 1 2 1 37 7 1229 1 6 -1 3
257873 2015984137515 2 9 0 18 0 0 0 0 1 … 1 6 1 62 9 1248 2 3 -1 2
257874 2015984137615 1 20 0 18 0 0 0 0 0 … 1 1 1 32 6 -1 -1 -1 -1 1
257875 2015984137615 2 8 0 18 0 0 0 0 0 … 1 1 1 46 8 1896 2 7 -1 2
257876 2015984139015 1 3 0 18 0 0 1 0 1 … 1 2 1 23 5 125 1 -1 -1 2
257877 2015984139115 1 19 0 18 0 0 1 0 0 … 1 2 1 38 7 1461 2 7 -1 2
257878 2015984139115 2 9 0 18 0 0 1 0 2 … 1 2 2 35 6 1598 2 1 -1 -1
257879 2015984139115 3 9 0 18 0 0 0 0 0 … 1 6 1 65 9 2179 2 1 -1 -1
257880 2015984139715 1 9 0 9 0 6 0 0 0 … 1 6 1 36 7 1560 2 1 -1 3
257881 2015984139715 2 9 0 18 0 8 1 0 7 … 1 6 2 24 5 1199 1 11 -1 3
257882 2015984140215 1 9 0 18 0 0 2 0 1 … 1 6 1 22 5 2926 2 12 -1 -1
257883 2015984140515 1 9 0 13 0 1 0 0 0 … 1 6 2 75 10 1490 1 7 -1 3
257884 2015984140515 2 9 0 7 0 7 0 0 0 … 1 6 1 20 4 1242 1 4 -1 1
257885 2015984140515 3 9 0 18 0 8 0 0 0 … 1 6 2 32 6 1598 2 1 -1 1
257886 2015984141415 1 9 0 18 0 0 1 0 1 … 1 6 1 34 6 2149 2 9 -1 1
257887 2015984141415 2 9 0 18 0 0 1 0 1 … 1 6 2 33 6 1596 1 13 -1 1
257888 2015984141415 3 9 0 18 0 0 1 0 1 … 1 6 1 23 5 2199 2 4 -1 2
257889 2015984141415 4 9 0 18 0 0 1 0 5 … 1 6 1 53 8 2993 2 -1 -1 3
257890 2015984141415 5 9 0 18 0 0 1 0 0 … 1 6 1 36 7 1989 1 18 -1 1
257891 2015984141415 6 9 0 18 0 0 1 0 0 … 1 6 1 26 6 1783 1 13 -1 1
257892 2015984141415 7 9 0 18 0 0 1 0 1 … 1 6 1 25 5 1968 2 -1 -1 1
257893 2015984141415 8 9 0 18 0 0 1 0 1 … 1 6 1 47 8 1997 2 1 -1 1
257894 2015984141415 9 9 0 18 0 0 1 0 0 … 1 6 1 57 9 1242 1 1 -1 1
257895 2015984141415 10 9 0 18 0 0 1 0 1 … 1 6 1 19 4 1422 2 -1 -1 2
257896 2015984141415 11 9 0 18 0 0 1 0 1 … 1 6 1 -1 -1 1596 1 5 -1 1
257897 2015984141415 12 9 0 18 0 0 1 0 0 … 1 6 1 29 6 1596 1 15 -1 3
257898 2015984141415 13 9 0 18 0 0 0 0 5 … 1 6 2 17 4 1199 1 12 -1 1

257899 rows × 22 columns

In [37]:

vehicle5[1:5]

Out[37]:

Accident_Index Vehicle_Reference Vehicle_Type Towing_and_Articulation Vehicle_Manoeuvre Vehicle_Location-Restricted_Lane Junction_Location Skidding_and_Overturning Hit_Object_in_Carriageway Vehicle_Leaving_Carriageway … Was_Vehicle_Left_Hand_Drive? Journey_Purpose_of_Driver Sex_of_Driver Age_of_Driver Age_Band_of_Driver Engine_Capacity_(CC) Propulsion_Code Age_of_Vehicle Driver_IMD_Decile Driver_Home_Area_Type
1 201501BS70002 1 9 0 9 0 8 0 0 0 … 1 6 1 -1 -1 1600 1 3 -1 -1
2 201501BS70004 1 9 0 9 0 2 0 0 0 … 1 6 1 30 6 1686 2 10 -1 1
3 201501BS70005 1 9 0 9 0 2 0 0 0 … 1 6 1 -1 -1 -1 -1 -1 -1 -1
4 201501BS70008 1 1 0 18 0 8 0 0 0 … 1 2 1 48 8 -1 -1 -1 -1 1

4 rows × 22 columns

In [38]:

acci5.columns

vehicle5[1:5][‘Accident_Index’]

Out[38]:

1 201501BS70002
2 201501BS70004
3 201501BS70005
4 201501BS70008
Name: Accident_Index, dtype: object

In [40]:

merged5 = pd.merge(acci5, vehicle5, on=’Accident_Index’, how=’inner’)
merged5 = pd.merge(merged5, casul5, on=’Accident_Index’, how=’inner’)

In [41]:

merged5

Out[41]:

Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date … Age_of_Casualty Age_Band_of_Casualty Casualty_Severity Pedestrian_Location Pedestrian_Movement Car_Passenger Bus_or_Coach_Passenger Pedestrian_Road_Maintenance_Worker Casualty_Type Casualty_Home_Area_Type
0 201501BS70001 525130.0 180050.0 -0.198465 51.505538 1 3 1 1 12/01/2015 … 41 7 3 5 1 0 0 2 0 -1
1 201501BS70002 526530.0 178560.0 -0.178838 51.491836 1 3 1 1 12/01/2015 … 24 5 3 9 9 0 0 2 0 1
2 201501BS70004 524610.0 181080.0 -0.205590 51.514910 1 3 1 1 12/01/2015 … 33 6 3 1 3 0 0 2 0 1
3 201501BS70005 524420.0 181080.0 -0.208327 51.514952 1 3 1 1 13/01/2015 … 9 2 3 5 1 0 0 2 0 1
4 201501BS70008 524630.0 179040.0 -0.206022 51.496572 1 2 2 1 09/01/2015 … 48 8 2 0 0 0 0 0 1 1
5 201501BS70008 524630.0 179040.0 -0.206022 51.496572 1 2 2 1 09/01/2015 … 48 8 2 0 0 0 0 0 1 1
6 201501BS70009 525480.0 179530.0 -0.193610 51.500788 1 3 2 1 15/01/2015 … 37 7 3 0 0 0 0 0 3 1
7 201501BS70009 525480.0 179530.0 -0.193610 51.500788 1 3 2 1 15/01/2015 … 37 7 3 0 0 0 0 0 3 1
8 201501BS70010 526890.0 178940.0 -0.173519 51.495171 1 3 2 1 15/01/2015 … 37 7 3 0 0 0 0 0 1 1
9 201501BS70010 526890.0 178940.0 -0.173519 51.495171 1 3 2 1 15/01/2015 … 37 7 3 0 0 0 0 0 1 1
10 201501BS70011 527590.0 178660.0 -0.163542 51.492497 1 3 2 1 18/01/2015 … 85 11 3 0 0 0 0 0 9 1
11 201501BS70011 527590.0 178660.0 -0.163542 51.492497 1 3 2 1 18/01/2015 … 85 11 3 0 0 0 0 0 9 1
12 201501BS70012 524170.0 180930.0 -0.211980 51.513659 1 3 2 1 16/01/2015 … 61 9 3 0 0 0 0 0 5 1
13 201501BS70012 524170.0 180930.0 -0.211980 51.513659 1 3 2 1 16/01/2015 … 61 9 3 0 0 0 0 0 5 1
14 201501BS70013 525010.0 181200.0 -0.199786 51.515900 1 3 2 2 20/01/2015 … 41 7 3 0 0 0 0 0 8 1
15 201501BS70013 525010.0 181200.0 -0.199786 51.515900 1 3 2 2 20/01/2015 … 46 8 3 0 0 0 0 0 8 1
16 201501BS70013 525010.0 181200.0 -0.199786 51.515900 1 3 2 2 20/01/2015 … 41 7 3 0 0 0 0 0 8 1
17 201501BS70013 525010.0 181200.0 -0.199786 51.515900 1 3 2 2 20/01/2015 … 46 8 3 0 0 0 0 0 8 1
18 201501BS70014 523850.0 181450.0 -0.216407 51.518402 1 3 2 1 23/01/2015 … 30 6 3 0 0 0 0 0 9 1
19 201501BS70014 523850.0 181450.0 -0.216407 51.518402 1 3 2 1 23/01/2015 … 30 6 3 0 0 0 0 0 9 1
20 201501BS70015 526110.0 179650.0 -0.184495 51.501726 1 3 1 2 22/01/2015 … 39 7 3 0 0 0 0 0 1 1
21 201501BS70015 526110.0 179650.0 -0.184495 51.501726 1 3 1 2 22/01/2015 … 26 6 3 4 1 0 0 2 0 1
22 201501BS70016 526970.0 178510.0 -0.172522 51.491289 1 3 2 1 21/01/2015 … 31 6 3 0 0 0 0 0 1 1
23 201501BS70016 526970.0 178510.0 -0.172522 51.491289 1 3 2 1 21/01/2015 … 31 6 3 0 0 0 0 0 1 1
24 201501BS70017 525350.0 178640.0 -0.195797 51.492818 1 3 1 1 21/01/2015 … 25 5 3 5 1 0 0 2 0 1
25 201501BS70018 527370.0 178170.0 -0.166886 51.488143 1 3 1 1 23/01/2015 … 59 9 3 6 9 0 0 2 0 1
26 201501BS70019 527370.0 178180.0 -0.166882 51.488233 1 3 1 1 23/01/2015 … 87 11 3 0 0 0 3 0 11 1
27 201501BS70020 524020.0 181050.0 -0.214099 51.514770 1 3 2 1 23/01/2015 … 41 7 3 0 0 0 0 0 1 1
28 201501BS70020 524020.0 181050.0 -0.214099 51.514770 1 3 2 1 23/01/2015 … 41 7 3 0 0 0 0 0 1 1
29 201501BS70022 526200.0 177720.0 -0.183889 51.484361 1 3 2 1 25/01/2015 … 29 6 3 0 0 0 0 0 4 1
… … … … … … … … … … … … … … … … … … … … … …
319897 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 29 6 3 0 0 0 0 0 9 3
319898 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 17 4 3 0 0 0 0 0 9 1
319899 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 36 7 3 0 0 0 0 0 9 1
319900 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 30 6 3 0 0 1 0 0 9 1
319901 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 0 1 3 0 0 2 0 0 9 1
319902 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 25 5 3 0 0 0 0 0 9 1
319903 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 19 4 3 0 0 0 0 0 9 2
319904 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 29 6 3 0 0 0 0 0 9 3
319905 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 17 4 3 0 0 0 0 0 9 1
319906 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 36 7 3 0 0 0 0 0 9 1
319907 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 30 6 3 0 0 1 0 0 9 1
319908 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 0 1 3 0 0 2 0 0 9 1
319909 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 25 5 3 0 0 0 0 0 9 1
319910 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 19 4 3 0 0 0 0 0 9 2
319911 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 29 6 3 0 0 0 0 0 9 3
319912 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 17 4 3 0 0 0 0 0 9 1
319913 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 36 7 3 0 0 0 0 0 9 1
319914 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 30 6 3 0 0 1 0 0 9 1
319915 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 0 1 3 0 0 2 0 0 9 1
319916 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 25 5 3 0 0 0 0 0 9 1
319917 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 19 4 3 0 0 0 0 0 9 2
319918 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 29 6 3 0 0 0 0 0 9 3
319919 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 17 4 3 0 0 0 0 0 9 1
319920 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 36 7 3 0 0 0 0 0 9 1
319921 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 30 6 3 0 0 1 0 0 9 1
319922 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 0 1 3 0 0 2 0 0 9 1
319923 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 25 5 3 0 0 0 0 0 9 1
319924 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 19 4 3 0 0 0 0 0 9 2
319925 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 29 6 3 0 0 0 0 0 9 3
319926 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 17 4 3 0 0 0 0 0 9 1

319927 rows × 67 columns

In [42]:

acci5.shape

Out[42]:

(140086, 32)

In [52]:

merged5[merged5[‘Accident_Index’] == ‘201501BS70008’]

Out[52]:

Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date … Age_of_Casualty Age_Band_of_Casualty Casualty_Severity Pedestrian_Location Pedestrian_Movement Car_Passenger Bus_or_Coach_Passenger Pedestrian_Road_Maintenance_Worker Casualty_Type Casualty_Home_Area_Type
4 201501BS70008 524630.0 179040.0 -0.206022 51.496572 1 2 2 1 09/01/2015 … 48 8 2 0 0 0 0 0 1 1
5 201501BS70008 524630.0 179040.0 -0.206022 51.496572 1 2 2 1 09/01/2015 … 48 8 2 0 0 0 0 0 1 1

2 rows × 67 columns

In [53]:

acci5[acci5[‘Accident_Index’] == ‘2015984141415’]

Out[53]:

Accident_Index Location_Easting_OSGR Location_Northing_OSGR Longitude Latitude Police_Force Accident_Severity Number_of_Vehicles Number_of_Casualties Date … Pedestrian_Crossing-Human_Control Pedestrian_Crossing-Physical_Facilities Light_Conditions Weather_Conditions Road_Surface_Conditions Special_Conditions_at_Site Carriageway_Hazards Urban_or_Rural_Area Did_Police_Officer_Attend_Scene_of_Accident LSOA_of_Accident_Location
140085 2015984141415 314050.0 579638.0 -3.348646 55.103676 98 3 13 7 31/12/2015 … 0 0 6 3 4 0 0 2 1 NaN

1 rows × 32 columns

In [54]:

casul5[casul5[‘Accident_Index’] == ‘2015984141415’]

Out[54]:

Accident_Index Vehicle_Reference Casualty_Reference Casualty_Class Sex_of_Casualty Age_of_Casualty Age_Band_of_Casualty Casualty_Severity Pedestrian_Location Pedestrian_Movement Car_Passenger Bus_or_Coach_Passenger Pedestrian_Road_Maintenance_Worker Casualty_Type Casualty_Home_Area_Type
186202 2015984141415 5 5 1 1 36 7 3 0 0 0 0 0 9 1
186203 2015984141415 5 6 2 2 30 6 3 0 0 1 0 0 9 1
186204 2015984141415 5 7 2 2 0 1 3 0 0 2 0 0 9 1
186205 2015984141415 7 4 1 1 25 5 3 0 0 0 0 0 9 1
186206 2015984141415 10 3 1 1 19 4 3 0 0 0 0 0 9 2
186207 2015984141415 12 2 1 1 29 6 3 0 0 0 0 0 9 3
186208 2015984141415 13 1 1 2 17 4 3 0 0 0 0 0 9 1

In [51]:

vehicle5[vehicle5[‘Accident_Index’] == ‘2015984141415’]

Out[51]:

Accident_Index Vehicle_Reference Vehicle_Type Towing_and_Articulation Vehicle_Manoeuvre Vehicle_Location-Restricted_Lane Junction_Location Skidding_and_Overturning Hit_Object_in_Carriageway Vehicle_Leaving_Carriageway … Was_Vehicle_Left_Hand_Drive? Journey_Purpose_of_Driver Sex_of_Driver Age_of_Driver Age_Band_of_Driver Engine_Capacity_(CC) Propulsion_Code Age_of_Vehicle Driver_IMD_Decile Driver_Home_Area_Type
257886 2015984141415 1 9 0 18 0 0 1 0 1 … 1 6 1 34 6 2149 2 9 -1 1
257887 2015984141415 2 9 0 18 0 0 1 0 1 … 1 6 2 33 6 1596 1 13 -1 1
257888 2015984141415 3 9 0 18 0 0 1 0 1 … 1 6 1 23 5 2199 2 4 -1 2
257889 2015984141415 4 9 0 18 0 0 1 0 5 … 1 6 1 53 8 2993 2 -1 -1 3
257890 2015984141415 5 9 0 18 0 0 1 0 0 … 1 6 1 36 7 1989 1 18 -1 1
257891 2015984141415 6 9 0 18 0 0 1 0 0 … 1 6 1 26 6 1783 1 13 -1 1
257892 2015984141415 7 9 0 18 0 0 1 0 1 … 1 6 1 25 5 1968 2 -1 -1 1
257893 2015984141415 8 9 0 18 0 0 1 0 1 … 1 6 1 47 8 1997 2 1 -1 1
257894 2015984141415 9 9 0 18 0 0 1 0 0 … 1 6 1 57 9 1242 1 1 -1 1
257895 2015984141415 10 9 0 18 0 0 1 0 1 … 1 6 1 19 4 1422 2 -1 -1 2
257896 2015984141415 11 9 0 18 0 0 1 0 1 … 1 6 1 -1 -1 1596 1 5 -1 1
257897 2015984141415 12 9 0 18 0 0 1 0 0 … 1 6 1 29 6 1596 1 15 -1 3
257898 2015984141415 13 9 0 18 0 0 0 0 5 … 1 6 2 17 4 1199 1 12 -1 1

13 rows × 22 columns

In [145]:

from sklearn import linear_model

X = merged5.drop(‘Casualty_Severity’, 1).drop(‘Accident_Index’, 1).drop(‘Vehicle_Reference_x’, 1).drop(‘Vehicle_Reference_y’,
1).drop(‘Casualty_Reference’, 1)

lc = [u’Date’,
u’Time’,
u’Local_Authority_(Highway)’,
u’LSOA_of_Accident_Location’]

for c in lc:
X = X.drop(c, 1)

len(X.columns)

Out[145]:

58

In [74]:

len(merged5.columns)

Out[74]:

67

In [75]:

Y = merged5[‘Casualty_Severity’]

In [95]:

# nx = X
# for c in X.columns:
# if X.dtypes[‘Location_Easting_OSGR’] == ‘Object’:
# print c, X.dtypes[‘Location_Easting_OSGR’]

X.columns.to_series().groupby(X.dtypes).groups

Out[95]:

{dtype(‘int64′): [u’Police_Force’,
u’Accident_Severity’,
u’Number_of_Vehicles’,
u’Number_of_Casualties’,
u’Day_of_Week’,
u’Local_Authority_(District)’,
u’1st_Road_Class’,
u’1st_Road_Number’,
u’Road_Type’,
u’Speed_limit’,
u’Junction_Detail’,
u’Junction_Control’,
u’2nd_Road_Class’,
u’2nd_Road_Number’,
u’Pedestrian_Crossing-Human_Control’,
u’Pedestrian_Crossing-Physical_Facilities’,
u’Light_Conditions’,
u’Weather_Conditions’,
u’Road_Surface_Conditions’,
u’Special_Conditions_at_Site’,
u’Carriageway_Hazards’,
u’Urban_or_Rural_Area’,
u’Did_Police_Officer_Attend_Scene_of_Accident’,
u’Vehicle_Reference_x’,
u’Vehicle_Type’,
u’Towing_and_Articulation’,
u’Vehicle_Manoeuvre’,
u’Vehicle_Location-Restricted_Lane’,
u’Junction_Location’,
u’Skidding_and_Overturning’,
u’Hit_Object_in_Carriageway’,
u’Vehicle_Leaving_Carriageway’,
u’Hit_Object_off_Carriageway’,
u’1st_Point_of_Impact’,
u’Was_Vehicle_Left_Hand_Drive?’,
u’Journey_Purpose_of_Driver’,
u’Sex_of_Driver’,
u’Age_of_Driver’,
u’Age_Band_of_Driver’,
u’Engine_Capacity_(CC)’,
u’Propulsion_Code’,
u’Age_of_Vehicle’,
u’Driver_IMD_Decile’,
u’Driver_Home_Area_Type’,
u’Vehicle_Reference_y’,
u’Casualty_Reference’,
u’Casualty_Class’,
u’Sex_of_Casualty’,
u’Age_of_Casualty’,
u’Age_Band_of_Casualty’,
u’Pedestrian_Location’,
u’Pedestrian_Movement’,
u’Car_Passenger’,
u’Bus_or_Coach_Passenger’,
u’Pedestrian_Road_Maintenance_Worker’,
u’Casualty_Type’,
u’Casualty_Home_Area_Type’],
dtype(‘float64′): [u’Location_Easting_OSGR’,
u’Location_Northing_OSGR’,
u’Longitude’,
u’Latitude’]}

In [146]:

X = X.fillna(X.mean())

In [117]:

print [sum(Y == 1), sum(Y == 2), sum(Y == 3)]
print sum(Y == 3)*1.0 / len(Y)

[2849, 33546, 283532]
0.886239673426

In [147]:

logreg = linear_model.LogisticRegression(C=1e5, class_weight = {1:100, 2:10, 3:1})
logreg.fit(X, Y)
res = logreg.predict(X)
print [sum(res == 1), sum(res == 2), sum(res == 3)]
print sum(res == Y)*1.0 / len(Y)

[30, 29298, 290599]
0.934838259978

In [148]:

sum(res == Y)*1.0 / len(Y)

Out[148]:

0.93483825997805747

In [124]:

res = logreg.predict(X)

In [126]:

sum(res == Y)

Out[126]:

298035

In [149]:

logreg.coef_
t = abs(logreg.coef_)
print t
cosum = sum(t, 0)
print cosum.shape

[[ 4.99631935e-07 2.41827807e-06 4.82759179e-02 2.40577142e-01
5.42289513e-03 8.20895039e+00 2.10503882e-01 3.37165832e-01
4.23743824e-02 5.48244360e-04 1.49709526e-02 1.18218270e-05
3.75833144e-02 8.65940209e-03 2.33776477e-02 9.63956201e-03
6.20307123e-03 1.91931776e-05 1.70377145e-02 4.10258594e-02
7.34706595e-02 7.90607358e-03 4.49933976e-02 2.38169465e-02
7.81601051e-02 7.48761623e-02 3.48525758e-01 4.23479183e-03
7.26385508e-03 1.45968768e-02 2.82251403e-02 1.61964780e-02
1.25584320e-02 1.52749866e-02 2.04215029e-02 7.76661847e-03
2.14957877e-02 1.94685755e-01 2.12599927e-03 9.09774103e-02
1.90928060e-02 1.03660524e-01 1.24152501e-05 7.06109033e-03
7.67356490e-03 4.72447325e-03 2.51940671e-03 2.29769329e-01
4.94321045e-01 5.03505351e-02 2.49369728e-01 2.65901396e-02
3.96891876e-02 4.71504818e-02 2.34603684e-01 6.34872611e-02
4.09360919e-04 1.03013645e-02]
[ 1.17284971e-08 1.91430425e-06 2.74586339e-02 2.33593763e-01
3.23300248e-04 4.56428584e+00 8.10101359e-02 9.56351577e-02
1.49179089e-02 1.87417407e-04 2.67653978e-02 5.29271406e-06
2.84585223e-02 2.08696818e-03 7.11970784e-03 2.47612249e-02
7.79757161e-03 6.80488708e-06 4.56933784e-03 2.81774131e-03
2.57781706e-02 3.01130565e-02 2.58938495e-02 1.12333186e-03
6.34587634e-03 7.52007056e-02 2.02041035e-01 8.40539269e-03
2.03448015e-02 1.82130844e-03 1.95803180e-02 9.33625658e-03
6.13955609e-02 6.21238270e-03 6.67525580e-03 1.82136773e-02
1.56305545e-02 1.14589371e-02 1.41782930e-02 4.03976109e-02
7.88561855e-04 3.77270722e-03 7.68953684e-06 4.82929085e-02
7.33748441e-03 4.64289933e-03 1.31783208e-02 4.75969517e-02
2.49204619e-01 1.20687746e-02 5.82421614e-02 4.92614110e-02
5.44960995e-02 1.89998425e-01 4.48551524e-02 4.86023672e-03
1.85457554e-02 1.17590936e-02]
[ 4.14617077e-06 6.47361568e-07 4.32430303e-07 2.74158211e-06
1.64373371e-06 1.26159182e-06 2.17810349e-07 2.00240586e-07
2.41413631e-07 2.11476325e-05 1.87525992e-07 6.88915018e-07
6.82643806e-08 3.21028973e-07 4.31704019e-07 4.50072809e-07
6.31971614e-07 4.16497541e-05 8.96471824e-10 1.24215357e-07
1.40478659e-07 1.32652900e-07 7.21264695e-08 2.27774078e-08
3.41106029e-09 1.98353775e-08 1.49322553e-07 7.26045540e-07
1.61992572e-08 9.27197949e-07 5.12809801e-08 3.85361651e-07
1.05697709e-07 1.31257362e-07 2.48764983e-07 2.97950062e-07
1.56169036e-07 5.60113241e-08 2.58853324e-07 1.62692871e-07
9.53645652e-08 9.58904906e-08 2.17111310e-05 1.17792493e-07
1.80933714e-07 5.49549602e-08 7.46458037e-08 3.03479709e-09
2.30057654e-07 1.54765261e-06 3.44302083e-08 4.12973293e-07
2.70635849e-07 1.41436149e-07 4.16266092e-08 2.08364247e-08
2.08267805e-06 2.23803175e-08]]
(58,)

In [152]:

model_coefficient = pd.Series(cosum, index=X.columns)

top20 = model_coefficient.sort_values(ascending=False).head(20)

print(model_coefficient.shape)
print(top20)

(58,)
Accident_Severity 12.773237
Sex_of_Casualty 0.743526
Did_Police_Officer_Attend_Scene_of_Accident 0.550567
Latitude 0.474174
Number_of_Casualties 0.432801
Age_Band_of_Casualty 0.307612
Number_of_Vehicles 0.291514
Bus_or_Coach_Passenger 0.279459
Casualty_Class 0.277366
Car_Passenger 0.237149
Was_Vehicle_Left_Hand_Drive? 0.206145
Urban_or_Rural_Area 0.150077
Sex_of_Driver 0.131375
Age_Band_of_Driver 0.107433
Light_Conditions 0.099249
Pedestrian_Movement 0.094186
Carriageway_Hazards 0.084506
Pedestrian_Location 0.075852
Longitude 0.075735
Skidding_and_Overturning 0.073954
dtype: float64

In [143]:

X.columns

Out[143]:

Index([u’Location_Easting_OSGR’, u’Location_Northing_OSGR’, u’Longitude’,
u’Latitude’, u’Police_Force’, u’Accident_Severity’,
u’Number_of_Vehicles’, u’Number_of_Casualties’, u’Day_of_Week’,
u’Local_Authority_(District)’, u’1st_Road_Class’, u’1st_Road_Number’,
u’Road_Type’, u’Speed_limit’, u’Junction_Detail’, u’Junction_Control’,
u’2nd_Road_Class’, u’2nd_Road_Number’,
u’Pedestrian_Crossing-Human_Control’,
u’Pedestrian_Crossing-Physical_Facilities’, u’Light_Conditions’,
u’Weather_Conditions’, u’Road_Surface_Conditions’,
u’Special_Conditions_at_Site’, u’Carriageway_Hazards’,
u’Urban_or_Rural_Area’, u’Did_Police_Officer_Attend_Scene_of_Accident’,
u’Vehicle_Reference_x’, u’Vehicle_Type’, u’Towing_and_Articulation’,
u’Vehicle_Manoeuvre’, u’Vehicle_Location-Restricted_Lane’,
u’Junction_Location’, u’Skidding_and_Overturning’,
u’Hit_Object_in_Carriageway’, u’Vehicle_Leaving_Carriageway’,
u’Hit_Object_off_Carriageway’, u’1st_Point_of_Impact’,
u’Was_Vehicle_Left_Hand_Drive?’, u’Journey_Purpose_of_Driver’,
u’Sex_of_Driver’, u’Age_of_Driver’, u’Age_Band_of_Driver’,
u’Engine_Capacity_(CC)’, u’Propulsion_Code’, u’Age_of_Vehicle’,
u’Driver_IMD_Decile’, u’Driver_Home_Area_Type’, u’Vehicle_Reference_y’,
u’Casualty_Reference’, u’Casualty_Class’, u’Sex_of_Casualty’,
u’Age_of_Casualty’, u’Age_Band_of_Casualty’, u’Pedestrian_Location’,
u’Pedestrian_Movement’, u’Car_Passenger’, u’Bus_or_Coach_Passenger’,
u’Pedestrian_Road_Maintenance_Worker’, u’Casualty_Type’,
u’Casualty_Home_Area_Type’],
dtype=’object’)

In [142]:

top10

Out[142]:

Accident_Severity 13.859734
Vehicle_Reference_y 0.988878
Was_Vehicle_Left_Hand_Drive? 0.796097
Sex_of_Casualty 0.678694
Did_Police_Officer_Attend_Scene_of_Accident 0.587250
Number_of_Casualties 0.530447
Latitude 0.508896
Bus_or_Coach_Passenger 0.403782
Number_of_Vehicles 0.387160
Car_Passenger 0.216460
Casualty_Reference 0.181001
Age_Band_of_Driver 0.160793
Pedestrian_Location 0.135368
Light_Conditions 0.131011
Carriageway_Hazards 0.123038
Road_Type 0.110670
1st_Road_Class 0.095317
Urban_or_Rural_Area 0.093129
Propulsion_Code 0.091446
Pedestrian_Crossing-Physical_Facilities 0.089807
dtype: float64

In [135]:

1.57025831e-06 + 5.81622541e-08 + 4.14616102e-06

Out[135]:

5.7745815841e-06

In [102]:

len(res)

Out[102]:

319927

In [104]:

type(res)

Out[104]:

numpy.ndarray

In [105]:

type(Y)

Out[105]:

pandas.core.series.Series

In [118]:

outD = pd.DataFrame({“pred”: res, “correct”: Y})
outD.to_csv(“compare.csv”, index=False, header=True)
outD

Out[118]:

correct pred
0 3 3
1 3 3
2 3 3
3 3 3
4 2 3
5 2 3
6 3 3
7 3 3
8 3 3
9 3 3
10 3 3
11 3 3
12 3 3
13 3 3
14 3 3
15 3 3
16 3 3
17 3 3
18 3 3
19 3 3
20 3 3
21 3 3
22 3 3
23 3 3
24 3 3
25 3 3
26 3 3
27 3 3
28 3 3
29 3 3
… … …
319897 3 3
319898 3 3
319899 3 3
319900 3 3
319901 3 3
319902 3 3
319903 3 3
319904 3 3
319905 3 3
319906 3 3
319907 3 3
319908 3 3
319909 3 3
319910 3 3
319911 3 3
319912 3 3
319913 3 3
319914 3 3
319915 3 3
319916 3 3
319917 3 3
319918 3 3
319919 3 3
319920 3 3
319921 3 3
319922 3 3
319923 3 3
319924 3 3
319925 3 3
319926 3 3

319927 rows × 2 columns

In [108]:

In [109]:

sum(res == 3)

Out[109]:

319927

In [110]:

sum(res == 1)

Out[110]:

0

In [111]:

sum(res == 2)

Out[111]:

0

In [ ]: