import numpy as np
Copyright By PowCoder代写 加微信 powcoder
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.tree import plot_tree
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import KMeans
pd.options.display.max_columns = 100000
listings_raw = pd.read_csv(“listings.csv”)
listings = listings_raw.drop([“listing_url”,”scrape_id”,”last_scraped”,”summary”,”space”,”description”,”experiences_offered”,”neighborhood_overview”,”notes”,”transit”,”thumbnail_url”,”medium_url”,”picture_url”,”xl_picture_url”,”host_location”,”host_about”,”host_acceptance_rate”,”host_thumbnail_url”,”host_picture_url”,”host_listings_count”,”host_has_profile_pic”,”host_neighbourhood”,”neighbourhood”,”city”,”state”,”market”,”smart_location”,”country_code”,”country”,”square_feet”,”calendar_last_scraped”,”requires_license”,”license”,”jurisdiction_names”,”require_guest_profile_picture”,”require_guest_phone_verification”], axis = 1)
listings.head()
id name host_id host_url host_name host_since host_response_time host_response_rate host_is_superhost host_total_listings_count host_verifications host_identity_verified street neighbourhood_cleansed neighbourhood_group_cleansed zipcode latitude longitude is_location_exact property_type room_type accommodates bathrooms bedrooms beds bed_type amenities price weekly_price monthly_price security_deposit cleaning_fee guests_included extra_people minimum_nights maximum_nights calendar_updated has_availability availability_30 availability_60 availability_90 availability_365 number_of_reviews first_review last_review review_scores_rating review_scores_accuracy review_scores_cleanliness review_scores_checkin review_scores_communication review_scores_location review_scores_value instant_bookable cancellation_policy calculated_host_listings_count reviews_per_month
0 241032 Stylish Queen 956883 https://www.airbnb.com/users/show/956883 Maija 2011-08-11 within a few hours 96% f 3.0 [’email’, ‘phone’, ‘reviews’, ‘kba’] t W, Seattle, WA 98119, United States West Queen Anne 98119 47.636289 -122.371025 t Apartment Entire home/apt 4 1.0 1.0 1.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,”A… $85.00 NaN NaN NaN NaN 2 $5.00 1 365 4 weeks ago t 14 41 71 346 207 2011-11-01 2016-01-02 95.0 10.0 10.0 10.0 10.0 9.0 10.0 f moderate 2 4.07
1 953595 Bright & Airy Queen 5177328 https://www.airbnb.com/users/show/5177328 Andrea 2013-02-21 within an hour 98% t 6.0 [’email’, ‘phone’, ‘facebook’, ‘linkedin’, ‘re… t 7th Avenue West, Seattle, WA 98119, United States West Queen Anne 98119 47.639123 -122.365666 t Apartment Entire home/apt 4 1.0 1.0 1.0 Real Bed {TV,Internet,”Wireless Internet”,Kitchen,”Free… $150.00 $1,000.00 $3,000.00 $100.00 $40.00 1 $0.00 2 90 today t 13 13 16 291 43 2013-08-19 2015-12-29 96.0 10.0 10.0 10.0 10.0 10.0 10.0 f strict 6 1.48
2 3308979 House-Amazing water view 16708587 https://www.airbnb.com/users/show/16708587 Jill 2014-06-12 within a few hours 67% f 2.0 [’email’, ‘phone’, ‘google’, ‘reviews’, ‘jumio’] t West Lee Street, Seattle, WA 98119, United States West Queen Anne 98119 47.629724 -122.369483 t House Entire home/apt 11 4.5 5.0 7.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,”A… $975.00 NaN NaN $1,000.00 $300.00 10 $25.00 4 30 5 weeks ago t 1 6 17 220 20 2014-07-30 2015-09-03 97.0 10.0 10.0 10.0 10.0 10.0 10.0 f strict 2 1.15
3 7421966 Queen Anne Chateau 9851441 https://www.airbnb.com/users/show/9851441 Emily 2013-11-06 NaN NaN f 1.0 [’email’, ‘phone’, ‘facebook’, ‘reviews’, ‘jum… t 8th Avenue West, Seattle, WA 98119, United States West Queen Anne 98119 47.638473 -122.369279 t Apartment Entire home/apt 3 1.0 0.0 2.0 Real Bed {Internet,”Wireless Internet”,Kitchen,”Indoor … $100.00 $650.00 $2,300.00 NaN NaN 1 $0.00 1 1125 6 months ago t 0 0 0 143 0 NaN NaN NaN NaN NaN NaN NaN NaN NaN f flexible 1 NaN
4 278830 Charming craftsman 3 bdm house 1452570 https://www.airbnb.com/users/show/1452570 Emily 2011-11-29 within an hour 100% f 2.0 [’email’, ‘phone’, ‘facebook’, ‘reviews’, ‘kba’] t 14th Ave W, Seattle, WA 98119, United States West Queen Anne 98119 47.632918 -122.372471 t House Entire home/apt 6 2.0 3.0 3.0 Real Bed {TV,”Cable TV”,Internet,”Wireless Internet”,Ki… $450.00 NaN NaN $700.00 $125.00 6 $15.00 1 1125 7 weeks ago t 30 60 90 365 38 2012-07-10 2015-10-24 92.0 9.0 9.0 10.0 10.0 9.0 9.0 f strict 1 0.89
7) Availability with reviews_per_month¶
reviews_per_month = pd.DataFrame(listings[[“reviews_per_month”]])
reviews_per_month.describe()
reviews_per_month
count 3191.000000
mean 2.078919
std 1.822348
min 0.020000
25% 0.695000
50% 1.540000
75% 3.000000
max 12.150000
f, axes = plt.subplots(1, 3, figsize=(20, 5))
for var in reviews_per_month:
sb.boxplot(data = reviews_per_month[var], orient = “h”, ax = axes[0])
sb.histplot(data = reviews_per_month[var], ax = axes[1])
sb.violinplot(data = reviews_per_month[var], orient = “h”, ax = axes[2])
availability = pd.DataFrame(listings[[“availability_30”, “availability_60”, “availability_90” , “availability_365”]])
availability.describe()
availability_30 availability_60 availability_90 availability_365
count 3818.000000 3818.000000 3818.000000 3818.000000
mean 16.786276 36.814825 58.082504 244.772656
std 12.173637 23.337541 34.063845 126.772526
min 0.000000 0.000000 0.000000 0.000000
25% 2.000000 13.000000 28.000000 124.000000
50% 20.000000 46.000000 73.000000 308.000000
75% 30.000000 59.000000 89.000000 360.000000
max 30.000000 60.000000 90.000000 365.000000
f, axes = plt.subplots(4, 3, figsize=(40, 24))
for var in availability:
sb.boxplot(data = availability[var], orient = “h”, ax = axes[count,0])
sb.histplot(data = availability[var], ax = axes[count,1])
sb.violinplot(data = availability[var], orient = “h”, ax = axes[count,2])
count += 1
# Split the Dataset into Train and Test
X_train, X_test, y_train, y_test = train_test_split(availability, reviews_per_month, test_size = 0.25)
trainDF = pd.concat([y_train, X_train], axis = 1).reindex(y_train.index)
f = plt.figure(figsize=(12, 8))
sb.heatmap(trainDF.corr(), vmin = -1, vmax = 1, annot = True, fmt = “.2f”)
sb.pairplot(data = trainDF)
8) Availability with price_fluctuation¶
price_fluctuation = pd.DataFrame(listings[[“price”]])
price_fluctuation[‘price’] = price_fluctuation[‘price’].fillna(0.0).str.replace(‘[$,]’, ”).astype(‘float’)
price_fluctuation.describe()
C:\Users\ \AppData\Local\Temp\ipykernel_2552\2005145843.py:2: FutureWarning: The default value of regex will change from True to False in a future version.
price_fluctuation[‘price’] = price_fluctuation[‘price’].fillna(0.0).str.replace(‘[$,]’, ”).astype(‘float’)
count 3818.000000
mean 127.976166
std 90.250022
min 20.000000
25% 75.000000
50% 100.000000
75% 150.000000
max 1000.000000
f, axes = plt.subplots(1, 3, figsize=(20, 5))
for var in price_fluctuation:
sb.boxplot(data = price_fluctuation[var], orient = “h”, ax = axes[0])
sb.histplot(data = price_fluctuation[var], ax = axes[1])
sb.violinplot(data = price_fluctuation[var], orient = “h”, ax = axes[2])
availability = pd.DataFrame(listings[[“availability_30”, “availability_60”, “availability_90” , “availability_365”]])
availability.describe()
availability_30 availability_60 availability_90 availability_365
count 3818.000000 3818.000000 3818.000000 3818.000000
mean 16.786276 36.814825 58.082504 244.772656
std 12.173637 23.337541 34.063845 126.772526
min 0.000000 0.000000 0.000000 0.000000
25% 2.000000 13.000000 28.000000 124.000000
50% 20.000000 46.000000 73.000000 308.000000
75% 30.000000 59.000000 89.000000 360.000000
max 30.000000 60.000000 90.000000 365.000000
f, axes = plt.subplots(4, 3, figsize=(40, 24))
for var in availability:
sb.boxplot(data = availability[var], orient = “h”, ax = axes[count,0])
sb.histplot(data = availability[var], ax = axes[count,1])
sb.violinplot(data = availability[var], orient = “h”, ax = axes[count,2])
count += 1
# Split the Dataset into Train and Test
X2_train, X2_test, y2_train, y2_test = train_test_split(availability, price_fluctuation, test_size = 0.25)
trainDF2 = pd.concat([y2_train, X2_train], axis = 1).reindex(y2_train.index)
f = plt.figure(figsize=(12, 8))
sb.heatmap(trainDF2.corr(), vmin = -1, vmax = 1, annot = True, fmt = “.2f”)
sb.pairplot(data = trainDF2)
11) number_of_amenities VS review_score_value¶
review_score_value = pd.DataFrame(listings[[“review_scores_value”]])
review_score_value.describe()
review_scores_value
count 3162.000000
mean 9.452245
std 0.750259
min 2.000000
25% 9.000000
50% 10.000000
75% 10.000000
max 10.000000
f, axes = plt.subplots(1, 3, figsize=(20, 5))
for var in review_score_value:
sb.boxplot(data = review_score_value[var], orient = “h”, ax = axes[0])
sb.histplot(data = review_score_value[var], ax = axes[1])
sb.violinplot(data = review_score_value[var], orient = “h”, ax = axes[2])
number_of_amenities = pd.DataFrame(listings[[“amenities”]])
number_of_amenities[‘amenities’] = number_of_amenities[‘amenities’].fillna(0.0).str.count(“,”) + 1
number_of_amenities.describe()
count 3818.000000
mean 14.370613
std 4.706825
min 1.000000
25% 11.000000
50% 14.000000
75% 17.000000
max 30.000000
f, axes = plt.subplots(1, 3, figsize=(20, 5))
for var in number_of_amenities:
sb.boxplot(data = number_of_amenities[var], orient = “h”, ax = axes[0])
sb.histplot(data = number_of_amenities[var], ax = axes[1])
sb.violinplot(data = number_of_amenities[var], orient = “h”, ax = axes[2])
# Split the Dataset into Train and Test
X3_train, X3_test, y3_train, y3_test = train_test_split(number_of_amenities, review_score_value, test_size = 0.25)
trainDF3 = pd.concat([y3_train, X3_train], axis = 1).reindex(y_train.index)
f = plt.figure(figsize=(12, 8))
sb.heatmap(trainDF3.corr(), vmin = -1, vmax = 1, annot = True, fmt = “.2f”)
sb.pairplot(data = trainDF3)
程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com