You can find details about how I obtained the NYC Airbnb dataset here
Let’s set some global options
knitr::opts_chunk$set(
fig.width = 6,
fig.asp = .6,
out.width = "90%"
)
library(tidyverse)
library(viridis)
Let’s clean the data a bit
nyc_airbnb = read_csv("data/nyc_airbnb.csv") %>%
select(-X1) %>%
mutate(stars = review_scores_location/2) %>%
rename(borough = neighbourhood_group)
Now that we have our data and libraries ready. Let the exploration begin!
nyc_airbnb %>%
count(room_type) %>% knitr::kable()
| room_type | n |
|---|---|
| Entire home/apt | 26050 |
| Private room | 22822 |
| Shared room | 1169 |
nyc_airbnb %>% count(borough) %>%
arrange(desc(n)) %>%
knitr::kable()
| borough | n |
|---|---|
| Manhattan | 23052 |
| Brooklyn | 20234 |
| Queens | 5454 |
| Bronx | 960 |
| Staten Island | 341 |
nyc_airbnb %>%
group_by(neighbourhood) %>%
summarize(median = median(stars, na.rm = TRUE),
mean = mean(stars, na.rm = TRUE),
number_of_listings = n()) %>%
knitr::kable()
| neighbourhood | median | mean | number_of_listings |
|---|---|---|---|
| Allerton | 4.50 | 4.553571 | 28 |
| Arden Heights | 4.25 | 4.250000 | 2 |
| Arrochar | 5.00 | 4.718750 | 19 |
| Arverne | 5.00 | 4.700000 | 81 |
| Astoria | 5.00 | 4.789148 | 940 |
| Bath Beach | 5.00 | 4.818182 | 13 |
| Battery Park City | 5.00 | 4.918919 | 66 |
| Bay Ridge | 5.00 | 4.791262 | 133 |
| Bay Terrace | 4.50 | 4.500000 | 5 |
| Bay Terrace, Staten Island | 4.75 | 4.750000 | 3 |
| Baychester | 4.25 | 4.250000 | 6 |
| Bayside | 4.75 | 4.708333 | 45 |
| Bayswater | 4.50 | 4.416667 | 17 |
| Bedford-Stuyvesant | 4.50 | 4.534142 | 3552 |
| Belle Harbor | 5.00 | 5.000000 | 6 |
| Bellerose | 5.00 | 4.444444 | 14 |
| Belmont | 4.50 | 4.642857 | 17 |
| Bensonhurst | 4.50 | 4.448980 | 62 |
| Bergen Beach | 4.50 | 4.571429 | 12 |
| Boerum Hill | 5.00 | 4.932099 | 196 |
| Borough Park | 4.50 | 4.494444 | 124 |
| Breezy Point | 4.75 | 4.750000 | 2 |
| Briarwood | 4.50 | 4.613636 | 53 |
| Brighton Beach | 5.00 | 4.769231 | 65 |
| Bronxdale | 5.00 | 4.687500 | 18 |
| Brooklyn Heights | 5.00 | 4.961538 | 148 |
| Brownsville | 4.50 | 4.241667 | 74 |
| Bull’s Head | 5.00 | 5.000000 | 5 |
| Bushwick | 4.50 | 4.625798 | 2461 |
| Cambria Heights | 4.50 | 4.692308 | 19 |
| Canarsie | 4.50 | 4.471963 | 127 |
| Carroll Gardens | 5.00 | 4.925258 | 230 |
| Castle Hill | 4.75 | 4.750000 | 5 |
| Castleton Corners | 5.00 | 4.875000 | 6 |
| Chelsea | 5.00 | 4.934833 | 1161 |
| Chelsea, Staten Island | NA | NaN | 1 |
| Chinatown | 5.00 | 4.722570 | 394 |
| City Island | 5.00 | 5.000000 | 15 |
| Civic Center | 5.00 | 4.794872 | 67 |
| Claremont Village | 4.50 | 4.300000 | 22 |
| Clason Point | 4.50 | 4.708333 | 19 |
| Clifton | 4.50 | 4.227273 | 13 |
| Clinton Hill | 5.00 | 4.805319 | 594 |
| Co-op City | 4.50 | 4.666667 | 6 |
| Cobble Hill | 5.00 | 4.929348 | 103 |
| College Point | 4.50 | 4.676471 | 20 |
| Columbia St | 5.00 | 4.763889 | 43 |
| Concord | 5.00 | 4.735294 | 19 |
| Concourse | 4.50 | 4.475000 | 52 |
| Concourse Village | 4.50 | 4.312500 | 37 |
| Coney Island | 4.50 | 4.447368 | 22 |
| Corona | 4.50 | 4.670454 | 53 |
| Crown Heights | 4.50 | 4.585004 | 1608 |
| Cypress Hills | 4.50 | 4.395238 | 118 |
| Ditmars Steinway | 5.00 | 4.770270 | 346 |
| Dongan Hills | 4.75 | 4.750000 | 2 |
| Douglaston | 5.00 | 4.875000 | 8 |
| Downtown Brooklyn | 5.00 | 4.896552 | 84 |
| DUMBO | 5.00 | 4.804348 | 38 |
| Dyker Heights | 4.50 | 4.638889 | 23 |
| East Elmhurst | 4.50 | 4.692000 | 152 |
| East Flatbush | 4.50 | 4.416667 | 397 |
| East Harlem | 4.50 | 4.509424 | 1191 |
| East Morrisania | 4.50 | 4.428571 | 8 |
| East New York | 4.50 | 4.267956 | 210 |
| East Village | 5.00 | 4.875550 | 2012 |
| Eastchester | 4.50 | 4.687500 | 12 |
| Edenwald | 5.00 | 4.750000 | 9 |
| Edgemere | 4.50 | 4.166667 | 9 |
| Elmhurst | 4.50 | 4.663889 | 226 |
| Eltingville | 5.00 | 5.000000 | 3 |
| Emerson Hill | 5.00 | 5.000000 | 4 |
| Far Rockaway | 4.50 | 4.500000 | 29 |
| Fieldston | 5.00 | 4.916667 | 13 |
| Financial District | 5.00 | 4.910405 | 604 |
| Flatbush | 4.50 | 4.633745 | 646 |
| Flatiron District | 5.00 | 4.981013 | 107 |
| Flatlands | 4.50 | 4.418182 | 70 |
| Flushing | 4.50 | 4.634561 | 422 |
| Fordham | 4.50 | 4.214286 | 35 |
| Forest Hills | 5.00 | 4.754310 | 148 |
| Fort Greene | 5.00 | 4.860955 | 451 |
| Fort Hamilton | 5.00 | 4.822222 | 57 |
| Fort Wadsworth | NA | NaN | 1 |
| Fresh Meadows | 4.50 | 4.673913 | 27 |
| Gerritsen Beach | 4.75 | 4.750000 | 3 |
| Glen Oaks | 4.50 | 4.500000 | 1 |
| Glendale | 4.50 | 4.428571 | 47 |
| Gowanus | 5.00 | 4.774510 | 245 |
| Gramercy | 5.00 | 4.923701 | 408 |
| Graniteville | 4.50 | 4.333333 | 5 |
| Grant City | 5.00 | 4.833333 | 3 |
| Gravesend | 4.50 | 4.662500 | 56 |
| Great Kills | 5.00 | 4.666667 | 9 |
| Greenpoint | 5.00 | 4.795073 | 1157 |
| Greenwich Village | 5.00 | 4.974359 | 412 |
| Grymes Hill | 5.00 | 4.833333 | 4 |
| Harlem | 4.50 | 4.631371 | 2807 |
| Hell’s Kitchen | 5.00 | 4.907673 | 2087 |
| Highbridge | 4.50 | 4.483333 | 32 |
| Hollis | 5.00 | 4.444444 | 13 |
| Holliswood | 4.50 | 4.500000 | 2 |
| Howard Beach | 5.00 | 4.818182 | 16 |
| Howland Hook | 4.50 | 4.500000 | 1 |
| Huguenot | 5.00 | 5.000000 | 3 |
| Hunts Point | 4.00 | 4.227273 | 14 |
| Inwood | 4.50 | 4.548781 | 266 |
| Jackson Heights | 5.00 | 4.744828 | 186 |
| Jamaica | 4.50 | 4.547771 | 191 |
| Jamaica Estates | 5.00 | 4.791667 | 21 |
| Jamaica Hills | 5.00 | 5.000000 | 3 |
| Kensington | 4.50 | 4.628906 | 171 |
| Kew Gardens | 5.00 | 4.807692 | 43 |
| Kew Gardens Hills | 4.50 | 4.625000 | 17 |
| Kingsbridge | 4.50 | 4.575472 | 66 |
| Kips Bay | 5.00 | 4.834232 | 511 |
| Laurelton | 4.50 | 4.583333 | 15 |
| Lighthouse Hill | 4.75 | 4.750000 | 3 |
| Little Italy | 5.00 | 4.842857 | 141 |
| Little Neck | 5.00 | 5.000000 | 2 |
| Long Island City | 5.00 | 4.748307 | 546 |
| Longwood | 4.50 | 4.473684 | 50 |
| Lower East Side | 5.00 | 4.819608 | 981 |
| Manhattan Beach | 4.50 | 4.571429 | 9 |
| Marble Hill | 5.00 | 4.750000 | 12 |
| Mariners Harbor | 4.25 | 4.250000 | 8 |
| Maspeth | 4.50 | 4.493506 | 97 |
| Melrose | 4.00 | 4.125000 | 6 |
| Middle Village | 4.50 | 4.692308 | 28 |
| Midland Beach | 5.00 | 4.700000 | 6 |
| Midtown | 5.00 | 4.919551 | 1696 |
| Midwood | 4.50 | 4.628049 | 121 |
| Mill Basin | 4.50 | 4.500000 | 6 |
| Morningside Heights | 5.00 | 4.829670 | 398 |
| Morris Heights | 4.50 | 4.500000 | 23 |
| Morris Park | 5.00 | 4.909091 | 16 |
| Morrisania | 4.50 | 4.583333 | 13 |
| Mott Haven | 4.50 | 4.409091 | 51 |
| Mount Eden | 4.50 | 4.555556 | 10 |
| Mount Hope | 4.50 | 4.571429 | 14 |
| Murray Hill | 5.00 | 4.834936 | 488 |
| Navy Yard | 4.50 | 4.681818 | 13 |
| Neponsit | 5.00 | 5.000000 | 2 |
| New Brighton | 4.50 | 4.333333 | 4 |
| New Dorp | 5.00 | 5.000000 | 2 |
| New Dorp Beach | 4.50 | 4.500000 | 4 |
| New Springville | 5.00 | 4.900000 | 8 |
| NoHo | 5.00 | 4.976923 | 81 |
| Nolita | 5.00 | 4.952586 | 339 |
| North Riverdale | 4.75 | 4.625000 | 10 |
| Norwood | 4.50 | 4.520000 | 37 |
| Oakwood | 5.00 | 5.000000 | 2 |
| Olinville | 5.00 | 5.000000 | 3 |
| Ozone Park | 4.50 | 4.557143 | 49 |
| Park Slope | 5.00 | 4.956221 | 544 |
| Parkchester | 4.50 | 4.637931 | 33 |
| Pelham Bay | 4.50 | 4.346154 | 15 |
| Pelham Gardens | 4.75 | 4.750000 | 25 |
| Pleasant Plains | NA | NaN | 1 |
| Port Morris | 4.50 | 4.357143 | 38 |
| Port Richmond | 4.50 | 4.357143 | 10 |
| Prince’s Bay | 5.00 | 5.000000 | 3 |
| Prospect Heights | 5.00 | 4.920128 | 371 |
| Prospect-Lefferts Gardens | 4.50 | 4.612766 | 574 |
| Queens Village | 4.50 | 4.583333 | 45 |
| Randall Manor | 4.50 | 4.500000 | 18 |
| Red Hook | 4.50 | 4.663934 | 72 |
| Rego Park | 5.00 | 4.641975 | 96 |
| Richmond Hill | 4.50 | 4.557971 | 79 |
| Richmondtown | 5.00 | 5.000000 | 1 |
| Ridgewood | 4.50 | 4.583612 | 413 |
| Riverdale | 5.00 | 4.833333 | 15 |
| Rockaway Beach | 5.00 | 4.738095 | 70 |
| Roosevelt Island | 5.00 | 4.692983 | 83 |
| Rosebank | 4.50 | 4.500000 | 4 |
| Rosedale | 4.50 | 4.413793 | 37 |
| Rossville | 5.00 | 5.000000 | 1 |
| Schuylerville | 4.50 | 4.600000 | 7 |
| Sea Gate | 5.00 | 4.666667 | 5 |
| Sheepshead Bay | 5.00 | 4.763441 | 154 |
| Shore Acres | 4.75 | 4.750000 | 9 |
| Silver Lake | 5.00 | 5.000000 | 2 |
| SoHo | 5.00 | 4.977032 | 380 |
| Soundview | 4.75 | 4.750000 | 9 |
| South Beach | 5.00 | 4.875000 | 5 |
| South Ozone Park | 4.50 | 4.558823 | 48 |
| South Slope | 5.00 | 4.939076 | 290 |
| Springfield Gardens | 4.50 | 4.500000 | 57 |
| Spuyten Duyvil | 4.75 | 4.625000 | 5 |
| St. Albans | 4.50 | 4.561224 | 62 |
| St. George | 5.00 | 4.743590 | 47 |
| Stapleton | 4.50 | 4.586207 | 34 |
| Stuyvesant Town | 5.00 | 4.769231 | 28 |
| Sunnyside | 5.00 | 4.785185 | 350 |
| Sunset Park | 4.50 | 4.656151 | 410 |
| Theater District | 5.00 | 4.903704 | 250 |
| Throgs Neck | 5.00 | 4.846154 | 14 |
| Todt Hill | 5.00 | 5.000000 | 1 |
| Tompkinsville | 4.50 | 4.590909 | 38 |
| Tottenville | 5.00 | 4.833333 | 4 |
| Tremont | 4.00 | 4.187500 | 10 |
| Tribeca | 5.00 | 4.954918 | 171 |
| Two Bridges | 4.50 | 4.638889 | 69 |
| Unionport | 4.50 | 4.500000 | 4 |
| University Heights | 4.50 | 4.473684 | 23 |
| Upper East Side | 5.00 | 4.875772 | 1952 |
| Upper West Side | 5.00 | 4.901019 | 2156 |
| Van Nest | 4.50 | 4.363636 | 15 |
| Vinegar Hill | 5.00 | 4.708333 | 31 |
| Wakefield | 4.50 | 4.500000 | 41 |
| Washington Heights | 4.50 | 4.585714 | 919 |
| West Brighton | 4.50 | 4.535714 | 17 |
| West Farms | 4.50 | 4.166667 | 5 |
| West Village | 5.00 | 4.974097 | 815 |
| Westchester Square | 4.50 | 4.583333 | 7 |
| Westerleigh | 5.00 | 5.000000 | 3 |
| Whitestone | 5.00 | 4.857143 | 12 |
| Williamsbridge | 4.50 | 4.465517 | 39 |
| Williamsburg | 5.00 | 4.856574 | 4168 |
| Willowbrook | 4.50 | 4.500000 | 1 |
| Windsor Terrace | 5.00 | 4.855634 | 173 |
| Woodhaven | 4.50 | 4.416667 | 73 |
| Woodlawn | 4.75 | 4.750000 | 8 |
| Woodrow | 4.50 | 4.500000 | 2 |
| Woodside | 5.00 | 4.708333 | 211 |
nyc_airbnb %>%
group_by(room_type) %>%
summarize(median_stars = median(stars, na.rm = TRUE),
average_stars = mean(stars, na.rm = TRUE),
number_of_listings = n())
## # A tibble: 3 x 4
## room_type median_stars average_stars number_of_listings
## <chr> <dbl> <dbl> <int>
## 1 Entire home/apt 5 4.78 26050
## 2 Private room 5 4.71 22822
## 3 Shared room 5 4.68 1169
ggplot(nyc_airbnb, aes(x = stars), color = boro) +
geom_histogram() +
facet_grid(~room_type)

nyc_airbnb %>%
group_by(borough, room_type) %>%
summarise(
mean_stars = mean(stars, na.rm = TRUE)) %>%
spread(key = room_type, value = mean_stars)
## # A tibble: 5 x 4
## # Groups: borough [5]
## borough `Entire home/apt` `Private room` `Shared room`
## <chr> <dbl> <dbl> <dbl>
## 1 Bronx 4.59 4.50 4.35
## 2 Brooklyn 4.73 4.67 4.61
## 3 Manhattan 4.84 4.79 4.80
## 4 Queens 4.71 4.68 4.60
## 5 Staten Island 4.69 4.62 5
nyc_airbnb %>%
filter(borough == "Manhattan", price < 1000) %>%
ggplot(aes(x = longitude, y = latitude, color = price)) +
geom_point(alpha = .2) +
facet_grid(~room_type) +
coord_cartesian() +
scale_color_viridis()

Entire homes are more expensive than other room types, and are more clustered in southern Manhattan.
nyc_airbnb %>%
select(borough, neighbourhood, price) %>%
filter(borough == "Manhattan") %>%
group_by(neighbourhood) %>%
summarize(mean_price = mean(price, na.rm = TRUE),
median_price = median(price, na.rm = TRUE)) %>%
top_n(5, median_price) %>%
arrange(desc(median_price))
## # A tibble: 5 x 3
## neighbourhood mean_price median_price
## <chr> <dbl> <dbl>
## 1 Tribeca 413. 251
## 2 NoHo 265. 245
## 3 Battery Park City 295. 225
## 4 Flatiron District 318. 218
## 5 Midtown 278. 210
nyc_airbnb %>%
select(borough, neighbourhood, price) %>%
filter(borough == "Brooklyn") %>%
group_by(neighbourhood) %>%
summarize(mean_price = mean(price, na.rm = TRUE),
median_price = median(price, na.rm = TRUE)) %>%
top_n(5, median_price) %>%
arrange(desc(median_price))
## # A tibble: 6 x 3
## neighbourhood mean_price median_price
## <chr> <dbl> <dbl>
## 1 DUMBO 190. 162.
## 2 Vinegar Hill 164. 159
## 3 Boerum Hill 180. 150
## 4 Brooklyn Heights 195. 150
## 5 Carroll Gardens 181. 150
## 6 Downtown Brooklyn 162. 150
nyc_airbnb %>%
select(borough, neighbourhood, price) %>%
filter(borough == "Manhattan") %>%
group_by(neighbourhood) %>%
summarize(mean_price = mean(price, na.rm = TRUE),
median_price = median(price, na.rm = TRUE)) %>%
top_n(-5, median_price) %>%
arrange(median_price)
## # A tibble: 6 x 3
## neighbourhood mean_price median_price
## <chr> <dbl> <dbl>
## 1 Washington Heights 89.4 70
## 2 Inwood 88.3 75
## 3 Roosevelt Island 103. 79
## 4 Marble Hill 82.2 89
## 5 Harlem 120. 90
## 6 Morningside Heights 109. 90
nyc_airbnb %>%
select(borough, neighbourhood, price) %>%
filter(borough == "Brooklyn") %>%
group_by(neighbourhood) %>%
summarize(mean_price = mean(price, na.rm = TRUE),
median_price = median(price, na.rm = TRUE)) %>%
top_n(-5, median_price) %>%
arrange(median_price)
## # A tibble: 6 x 3
## neighbourhood mean_price median_price
## <chr> <dbl> <dbl>
## 1 Gerritsen Beach 74 50
## 2 Borough Park 69.7 55
## 3 Gravesend 81.3 61
## 4 Cypress Hills 80.2 61.5
## 5 Brownsville 86.3 65
## 6 Bushwick 81.8 65