import numpy as np
Copyright By PowCoder代写 加微信 powcoder
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.tree import plot_tree
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import KMeans
pd.options.display.max_rows = 100000
pd.options.display.max_columns = 100000
listings_raw = pd.read_csv(“listings.csv”)
listings_raw.head()
id listing_url scrape_id last_scraped name summary space description experiences_offered neighborhood_overview notes transit thumbnail_url medium_url picture_url xl_picture_url host_id host_url host_name host_since host_location host_about host_response_time host_response_rate host_acceptance_rate host_is_superhost host_thumbnail_url host_picture_url host_neighbourhood host_listings_count host_total_listings_count host_verifications host_has_profile_pic host_identity_verified street neighbourhood neighbourhood_cleansed neighbourhood_group_cleansed city state zipcode market smart_location country_code country latitude longitude is_location_exact property_type room_type accommodates bathrooms bedrooms beds bed_type amenities square_feet price weekly_price monthly_price security_deposit cleaning_fee guests_included extra_people minimum_nights maximum_nights calendar_updated has_availability availability_30 availability_60 availability_90 availability_365 calendar_last_scraped number_of_reviews first_review last_review review_scores_rating review_scores_accuracy review_scores_cleanliness review_scores_checkin review_scores_communication review_scores_location review_scores_value requires_license license jurisdiction_names instant_bookable cancellation_policy require_guest_profile_picture require_guest_phone_verification calculated_host_listings_count reviews_per_month
0 241032 https://www.airbnb.com/rooms/241032 20160104002432 4/1/2016 Stylish Queen NaN Make your self at home in this charming one-be… Make your self at home in this charming one-be… none NaN NaN NaN NaN NaN https://a1.muscache.com/ac/pictures/67560560/c… NaN 956883 https://www.airbnb.com/users/show/956883 Maija 11/8/2011 Seattle, Washington, United States I am an artist, interior designer, and run a s… within a few hours 96% 100% f https://a0.muscache.com/ac/users/956883/profil… https://a0.muscache.com/ac/users/956883/profil… Queen Anne 3.0 3.0 [’email’, ‘phone’, ‘reviews’, ‘kba’] t t W, Seattle, WA 98119, United States Queen Anne West Queen WA 98119 Seattle Seattle, WA US United States 47.636289 -122.371025 t Apartment Entire home/apt 4 1.0 1.0 1.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,”A… NaN $85.00 NaN NaN NaN NaN 2 $5.00 1 365 4 weeks ago t 14 41 71 346 4/1/2016 207 1/11/2011 2/1/2016 95.0 10.0 10.0 10.0 10.0 9.0 10.0 f NaN WASHINGTON f moderate f f 2 4.07
1 953595 https://www.airbnb.com/rooms/953595 20160104002432 4/1/2016 Bright & Airy Queen Chemically sensitive? We’ve removed the irrita… Beautiful, hypoallergenic apartment in an extr… Chemically sensitive? We’ve removed the irrita… none Queen Anne is a wonderful, truly functional vi… What’s up with the free pillows? Our home was… Convenient bus stops are just down the block, … https://a0.muscache.com/ac/pictures/14409893/f… https://a0.muscache.com/im/pictures/14409893/f… https://a0.muscache.com/ac/pictures/14409893/f… https://a0.muscache.com/ac/pictures/14409893/f… 5177328 https://www.airbnb.com/users/show/5177328 Andrea 21/2/2013 Seattle, Washington, United States Living east coast/left coast/overseas. Time i… within an hour 98% 100% t https://a0.muscache.com/ac/users/5177328/profi… https://a0.muscache.com/ac/users/5177328/profi… Queen Anne 6.0 6.0 [’email’, ‘phone’, ‘facebook’, ‘linkedin’, ‘re… t t 7th Avenue West, Seattle, WA 98119, United States Queen Anne West Queen WA 98119 Seattle Seattle, WA US United States 47.639123 -122.365667 t Apartment Entire home/apt 4 1.0 1.0 1.0 Real Bed {TV,Internet,”Wireless Internet”,Kitchen,”Free… NaN $150.00 $1,000.00 $3,000.00 $100.00 $40.00 1 $0.00 2 90 today t 13 13 16 291 4/1/2016 43 19/8/2013 29/12/2015 96.0 10.0 10.0 10.0 10.0 10.0 10.0 f NaN WASHINGTON f strict t t 6 1.48
2 3308979 https://www.airbnb.com/rooms/3308979 20160104002432 4/1/2016 House-Amazing water view New modern house built in 2013. Spectacular s… Our house is modern, light and fresh with a wa… New modern house built in 2013. Spectacular s… none Upper Queen Anne is a charming neighborhood fu… Our house is located just 5 short blocks to To… A bus stop is just 2 blocks away. Easy bus a… NaN NaN https://a2.muscache.com/ac/pictures/b4324e0f-a… NaN 16708587 https://www.airbnb.com/users/show/16708587 Jill 12/6/2014 Seattle, Washington, United States i love living in Seattle. i grew up in the mi… within a few hours 67% 100% f https://a1.muscache.com/ac/users/16708587/prof… https://a1.muscache.com/ac/users/16708587/prof… Queen Anne 2.0 2.0 [’email’, ‘phone’, ‘google’, ‘reviews’, ‘jumio’] t t West Lee Street, Seattle, WA 98119, United States Queen Anne West Queen WA 98119 Seattle Seattle, WA US United States 47.629724 -122.369483 t House Entire home/apt 11 4.5 5.0 7.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,”A… NaN $975.00 NaN NaN $1,000.00 $300.00 10 $25.00 4 30 5 weeks ago t 1 6 17 220 4/1/2016 20 30/7/2014 3/9/2015 97.0 10.0 10.0 10.0 10.0 10.0 10.0 f NaN WASHINGTON f strict f f 2 1.15
3 7421966 https://www.airbnb.com/rooms/7421966 20160104002432 4/1/2016 Queen Anne Chateau A charming apartment that sits atop Queen Anne… NaN A charming apartment that sits atop Queen Anne… none NaN NaN NaN NaN NaN https://a0.muscache.com/ac/pictures/94146944/6… NaN 9851441 https://www.airbnb.com/users/show/9851441 Emily 6/11/2013 Seattle, Washington, United States NaN NaN NaN NaN f https://a2.muscache.com/ac/users/9851441/profi… https://a2.muscache.com/ac/users/9851441/profi… Queen Anne 1.0 1.0 [’email’, ‘phone’, ‘facebook’, ‘reviews’, ‘jum… t t 8th Avenue West, Seattle, WA 98119, United States Queen Anne West Queen WA 98119 Seattle Seattle, WA US United States 47.638473 -122.369279 t Apartment Entire home/apt 3 1.0 0.0 2.0 Real Bed {Internet,”Wireless Internet”,Kitchen,”Indoor … NaN $100.00 $650.00 $2,300.00 NaN NaN 1 $0.00 1 1125 6 months ago t 0 0 0 143 4/1/2016 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN f NaN WASHINGTON f flexible f f 1 NaN
4 278830 https://www.airbnb.com/rooms/278830 20160104002432 4/1/2016 Charming craftsman 3 bdm house Cozy family craftman house in beautiful neighb… Cozy family craftman house in beautiful neighb… Cozy family craftman house in beautiful neighb… none We are in the beautiful neighborhood of Queen … Belltown The nearest public transit bus (D Line) is 2 b… NaN NaN https://a1.muscache.com/ac/pictures/6120468/b0… NaN 1452570 https://www.airbnb.com/users/show/1452570 Emily 29/11/2011 Seattle, Washington, United States Hi, I live in Seattle, Washington but I’m orig… within an hour 100% NaN f https://a0.muscache.com/ac/users/1452570/profi… https://a0.muscache.com/ac/users/1452570/profi… Queen Anne 2.0 2.0 [’email’, ‘phone’, ‘facebook’, ‘reviews’, ‘kba’] t t 14th Ave W, Seattle, WA 98119, United States Queen Anne West Queen WA 98119 Seattle Seattle, WA US United States 47.632918 -122.372471 t House Entire home/apt 6 2.0 3.0 3.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,Ki… NaN $450.00 NaN NaN $700.00 $125.00 6 $15.00 1 1125 7 weeks ago t 30 60 90 365 4/1/2016 38 10/7/2012 24/10/2015 92.0 9.0 9.0 10.0 10.0 9.0 9.0 f NaN WASHINGTON f strict f f 1 0.89
listings_raw.info()
RangeIndex: 3818 entries, 0 to 3817
Data columns (total 92 columns):
# Column Non-Null Count Dtype
— —— ————– —–
0 id 3818 non-null int64
1 listing_url 3818 non-null object
2 scrape_id 3818 non-null int64
3 last_scraped 3818 non-null object
4 name 3818 non-null object
5 summary 3641 non-null object
6 space 3249 non-null object
7 description 3818 non-null object
8 experiences_offered 3818 non-null object
9 neighborhood_overview 2786 non-null object
10 notes 2212 non-null object
11 transit 2884 non-null object
12 thumbnail_url 3498 non-null object
13 medium_url 3498 non-null object
14 picture_url 3818 non-null object
15 xl_picture_url 3498 non-null object
16 host_id 3818 non-null int64
17 host_url 3818 non-null object
18 host_name 3816 non-null object
19 host_since 3816 non-null object
20 host_location 3810 non-null object
21 host_about 2959 non-null object
22 host_response_time 3295 non-null object
23 host_response_rate 3295 non-null object
24 host_acceptance_rate 3045 non-null object
25 host_is_superhost 3816 non-null object
26 host_thumbnail_url 3816 non-null object
27 host_picture_url 3816 non-null object
28 host_neighbourhood 3518 non-null object
29 host_listings_count 3816 non-null float64
30 host_total_listings_count 3816 non-null float64
31 host_verifications 3818 non-null object
32 host_has_profile_pic 3816 non-null object
33 host_identity_verified 3816 non-null object
34 street 3818 non-null object
35 neighbourhood 3402 non-null object
36 neighbourhood_cleansed 3818 non-null object
37 neighbourhood_group_cleansed 3818 non-null object
38 city 3818 non-null object
39 state 3818 non-null object
40 zipcode 3811 non-null object
41 market 3818 non-null object
42 smart_location 3818 non-null object
43 country_code 3818 non-null object
44 country 3818 non-null object
45 latitude 3818 non-null float64
46 longitude 3818 non-null float64
47 is_location_exact 3818 non-null object
48 property_type 3817 non-null object
49 room_type 3818 non-null object
50 accommodates 3818 non-null int64
51 bathrooms 3802 non-null float64
52 bedrooms 3812 non-null float64
53 beds 3817 non-null float64
54 bed_type 3818 non-null object
55 amenities 3818 non-null object
56 square_feet 97 non-null float64
57 price 3818 non-null object
58 weekly_price 2009 non-null object
59 monthly_price 1517 non-null object
60 security_deposit 1866 non-null object
61 cleaning_fee 2788 non-null object
62 guests_included 3818 non-null int64
63 extra_people 3818 non-null object
64 minimum_nights 3818 non-null int64
65 maximum_nights 3818 non-null int64
66 calendar_updated 3818 non-null object
67 has_availability 3818 non-null object
68 availability_30 3818 non-null int64
69 availability_60 3818 non-null int64
70 availability_90 3818 non-null int64
71 availability_365 3818 non-null int64
72 calendar_last_scraped 3818 non-null object
73 number_of_reviews 3818 non-null int64
74 first_review 3191 non-null object
75 last_review 3191 non-null object
76 review_scores_rating 3171 non-null float64
77 review_scores_accuracy 3160 non-null float64
78 review_scores_cleanliness 3165 non-null float64
79 review_scores_checkin 3160 non-null float64
80 review_scores_communication 3167 non-null float64
81 review_scores_location 3163 non-null float64
82 review_scores_value 3162 non-null float64
83 requires_license 3818 non-null object
84 license 0 non-null float64
85 jurisdiction_names 3818 non-null object
86 instant_bookable 3818 non-null object
87 cancellation_policy 3818 non-null object
88 require_guest_profile_picture 3818 non-null object
89 require_guest_phone_verification 3818 non-null object
90 calculated_host_listings_count 3818 non-null int64
91 reviews_per_month 3191 non-null float64
dtypes: float64(17), int64(13), object(62)
memory usage: 2.7+ MB
calendar_raw = pd.read_csv(“calendar.csv”)
calendar_raw.head()
listing_id date available price
0 241032 2016-01-04 t $85.00
1 241032 2016-01-05 t $85.00
2 241032 2016-01-06 f NaN
3 241032 2016-01-07 f NaN
4 241032 2016-01-08 f NaN
calendar_raw.info()
RangeIndex: 1393570 entries, 0 to 1393569
Data columns (total 4 columns):
# Column Non-Null Count Dtype
— —— ————– —–
0 listing_id 1393570 non-null int64
1 date 1393570 non-null object
2 available 1393570 non-null object
3 price 934542 non-null object
dtypes: int64(1), object(3)
memory usage: 42.5+ MB
reviews_raw = pd.read_csv(“reviews.csv”)
reviews_raw.head()
listing_id id date reviewer_id reviewer_name comments
0 7202016 38917982 2015-07-19 28943674 and cozy place. Perfect location to every…
1 7202016 39087409 2015-07-20 32440555 has a great room in a very central locat…
2 7202016 39820030 2015-07-26 37722850 Ian Very spacious apartment, and in a great neighb…
3 7202016 40813543 2015-08-02 33671805 to Seattle Center and all it has to offe…
4 7202016 41986501 2015-08-10 34959538 was a great host and very accommodating …
reviews_raw.info()
RangeIndex: 84849 entries, 0 to 84848
Data columns (total 6 columns):
# Column Non-Null Count Dtype
— —— ————– —–
0 listing_id 84849 non-null int64
1 id 84849 non-null int64
2 date 84849 non-null object
3 reviewer_id 84849 non-null int64
4 reviewer_name 84849 non-null object
5 comments 84831 non-null object
dtypes: int64(3), object(3)
memory usage: 3.9+ MB
“listings” will be the CLEAN dataset to use for analytical processes¶
listings = listings_raw.drop([“listing_url”,”scrape_id”,”last_scraped”,”summary”,”space”,”description”,”experiences_offered”,”neighborhood_overview”,”notes”,”transit”,”thumbnail_url”,”medium_url”,”picture_url”,”xl_picture_url”,”host_location”,”host_about”,”host_acceptance_rate”,”host_thumbnail_url”,”host_picture_url”,”host_listings_count”,”host_has_profile_pic”,”host_neighbourhood”,”neighbourhood”,”city”,”state”,”market”,”smart_location”,”country_code”,”country”,”square_feet”,”calendar_last_scraped”,”requires_license”,”license”,”jurisdiction_names”,”require_guest_profile_picture”,”require_guest_phone_verification”], axis = 1)
listings.head()
id name host_id host_url host_name host_since host_response_time host_response_rate host_is_superhost host_total_listings_count host_verifications host_identity_verified street neighbourhood_cleansed neighbourhood_group_cleansed zipcode latitude longitude is_location_exact property_type room_type accommodates bathrooms bedrooms beds bed_type amenities price weekly_price monthly_price security_deposit cleaning_fee guests_included extra_people minimum_nights maximum_nights calendar_updated has_availability availability_30 availability_60 availability_90 availability_365 number_of_reviews first_review last_review review_scores_rating review_scores_accuracy review_scores_cleanliness review_scores_checkin review_scores_communication review_scores_location review_scores_value instant_bookable cancellation_policy calculated_host_listings_count reviews_per_month
0 241032 Stylish Queen 956883 https://www.airbnb.com/users/show/956883 Maija 11/8/2011 within a few hours 96% f 3.0 [’email’, ‘phone’, ‘reviews’, ‘kba’] t W, Seattle, WA 98119, United States West Queen Anne 98119 47.636289 -122.371025 t Apartment Entire home/apt 4 1.0 1.0 1.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,”A… $85.00 NaN NaN NaN NaN 2 $5.00 1 365 4 weeks ago t 14 41 71 346 207 1/11/2011 2/1/2016 95.0 10.0 10.0 10.0 10.0 9.0 10.0 f moderate 2 4.07
1 953595 Bright & Airy Queen 5177328 https://www.airbnb.com/users/show/5177328 Andrea 21/2/2013 within an hour 98% t 6.0 [’email’, ‘phone’, ‘facebook’, ‘linkedin’, ‘re… t 7th Avenue West, Seattle, WA 98119, United States West Queen Anne 98119 47.639123 -122.365667 t Apartment Entire home/apt 4 1.0 1.0 1.0 Real Bed {TV,Internet,”Wireless Internet”,Kitchen,”Free… $150.00 $1,000.00 $3,000.00 $100.00 $40.00 1 $0.00 2 90 today t 13 13 16 291 43 19/8/2013 29/12/2015 96.0 10.0 10.0 10.0 10.0 10.0 10.0 f strict 6 1.48
2 3308979 House-Amazing water view 16708587 https://www.airbnb.com/users/show/16708587 Jill 12/6/2014 within a few hours 67% f 2.0 [’email’, ‘phone’, ‘google’, ‘reviews’, ‘jumio’] t West Lee Street, Seattle, WA 98119, United States West Queen Anne 98119 47.629724 -122.369483 t House Entire home/apt 11 4.5 5.0 7.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,”A… $975.00 NaN NaN $1,000.00 $300.00 10 $25.00 4 30 5 weeks ago t 1 6 17 220 20 30/7/2014 3/9/2015 97.0 10.0 10.0 10.0 10.0 10.0 10.0 f strict 2 1.15
3 7421966 Queen Anne Chateau 9851441 https://www.airbnb.com/users/show/9851441 Emily 6/11/2013 NaN NaN f 1.0 [’email’, ‘phone’, ‘facebook’, ‘reviews’, ‘jum… t 8th Avenue West, Seattle, WA 98119, United States West Queen Anne 98119 47.638473 -122.369279 t Apartment Entire home/apt 3 1.0 0.0 2.0 Real Bed {Internet,”Wireless Internet”,Kitchen,”Indoor … $100.00 $650.00 $2,300.00 NaN NaN 1 $0.00 1 1125 6 months ago t 0 0 0 143 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN f flexible 1 NaN
4 278830 Charming craftsman 3 bdm house 1452570 https://www.airbnb.com/users/show/1452570 Emily 29/11/2011 within an hour 100% f 2.0 [’email’, ‘phone’, ‘facebook’, ‘reviews’, ‘kba’] t 14th Ave W, Seattle, WA 98119, United States West Queen Anne 98119 47.632918 -122.372471 t House Entire home/apt 6 2.0 3.0 3.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,Ki… $450.00 NaN NaN $700.00 $125.00 6 $15.00 1 1125 7 weeks ago t 30 60 90 365 38 10/7/2012 24/10/
程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com