Serv NYC Project
Serv NYC Project
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
In [6]:
In [8]:
serv_nyc.head()
Out[8]:
5 rows × 53 columns
In [9]:
serv_nyc.tail()
Out[9]:
5 rows × 53 columns
In [10]:
# understanding patterns of the dataset
serv_nyc
Out[10]:
... ... ... ... ... ... ... ... ... ...
In [11]:
# getting the information
serv_nyc.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300698 entries, 0 to 300697
Data columns (total 53 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Unique Key 300698 non-null int64
0 Unique Key 300698 non-null int64
1 Created Date 300698 non-null object
2 Closed Date 298534 non-null object
3 Agency 300698 non-null object
4 Agency Name 300698 non-null object
5 Complaint Type 300698 non-null object
6 Descriptor 294784 non-null object
7 Location Type 300567 non-null object
8 Incident Zip 298083 non-null float64
9 Incident Address 256288 non-null object
10 Street Name 256288 non-null object
11 Cross Street 1 251419 non-null object
12 Cross Street 2 250919 non-null object
13 Intersection Street 1 43858 non-null object
14 Intersection Street 2 43362 non-null object
15 Address Type 297883 non-null object
16 City 298084 non-null object
17 Landmark 349 non-null object
18 Facility Type 298527 non-null object
19 Status 300698 non-null object
20 Due Date 300695 non-null object
21 Resolution Description 300698 non-null object
22 Resolution Action Updated Date 298511 non-null object
23 Community Board 300698 non-null object
24 Borough 300698 non-null object
25 X Coordinate (State Plane) 297158 non-null float64
26 Y Coordinate (State Plane) 297158 non-null float64
27 Park Facility Name 300698 non-null object
28 Park Borough 300698 non-null object
29 School Name 300698 non-null object
30 School Number 300698 non-null object
31 School Region 300697 non-null object
32 School Code 300697 non-null object
33 School Phone Number 300698 non-null object
34 School Address 300698 non-null object
35 School City 300698 non-null object
36 School State 300698 non-null object
37 School Zip 300697 non-null object
38 School Not Found 300698 non-null object
39 School or Citywide Complaint 0 non-null float64
40 Vehicle Type 0 non-null float64
41 Taxi Company Borough 0 non-null float64
42 Taxi Pick Up Location 0 non-null float64
43 Bridge Highway Name 243 non-null object
44 Bridge Highway Direction 243 non-null object
45 Road Ramp 213 non-null object
46 Bridge Highway Segment 213 non-null object
47 Garage Lot Name 0 non-null float64
48 Ferry Direction 1 non-null object
49 Ferry Terminal Name 2 non-null object
50 Latitude 297158 non-null float64
51 Longitude 297158 non-null float64
52 Location 297158 non-null object
dtypes: float64(10), int64(1), object(42)
memory usage: 121.6+ MB
In [12]:
Sch
Unique Created Closed Agency Incident Incident
Agency Complaint Type Descriptor Location Type ...
Key Date Date Name Zip Address
Fo
5 rows × 48 columns
In [14]:
# understanding the dataset getting mean,standard deviation,percentile, min, max value
serv_nyc1.describe()
Out[14]:
Unique Key Incident Zip X Coordinate (State Plane) Y Coordinate (State Plane) Latitude Longitude
In [15]:
serv_nyc1.index.values
Out[15]:
array([ 0, 1, 2, ..., 300695, 300696, 300697], dtype=int64)
In [16]:
serv_nyc1.columns.values
Out[16]:
array(['Unique Key', 'Created Date', 'Closed Date', 'Agency',
'Agency Name', 'Complaint Type', 'Descriptor', 'Location Type',
'Incident Zip', 'Incident Address', 'Street Name',
'Cross Street 1', 'Cross Street 2', 'Intersection Street 1',
'Intersection Street 2', 'Address Type', 'City', 'Landmark',
'Facility Type', 'Status', 'Due Date', 'Resolution Description',
'Resolution Action Updated Date', 'Community Board', 'Borough',
'X Coordinate (State Plane)', 'Y Coordinate (State Plane)',
'Park Facility Name', 'Park Borough', 'School Name',
'School Number', 'School Region', 'School Code',
'School Phone Number', 'School Address', 'School City',
'School State', 'School Zip', 'School Not Found',
'Bridge Highway Name', 'Bridge Highway Direction', 'Road Ramp',
'Bridge Highway Segment', 'Ferry Direction', 'Ferry Terminal Name',
'Latitude', 'Longitude', 'Location'], dtype=object)
In [17]:
serv_nyc1.info
Out[17]:
<bound method DataFrame.info of Unique Key Created Date C
losed Date Agency \
0 32310363 12/31/2015 11:59:45 PM 01-01-16 0:55 NYPD
1 32309934 12/31/2015 11:59:44 PM 01-01-16 1:26 NYPD
2 32309159 12/31/2015 11:59:29 PM 01-01-16 4:51 NYPD
3 32305098 12/31/2015 11:57:46 PM 01-01-16 7:43 NYPD
4 32306529 12/31/2015 11:56:58 PM 01-01-16 3:24 NYPD
... ... ... ... ...
300693 30281872 03/29/2015 12:33:41 AM NaN NYPD
300694 30281230 03/29/2015 12:33:28 AM 03/29/2015 02:33:59 AM NYPD
300695 30283424 03/29/2015 12:33:03 AM 03/29/2015 03:40:20 AM NYPD
300696 30280004 03/29/2015 12:33:02 AM 03/29/2015 04:38:35 AM NYPD
300697 30281825 03/29/2015 12:33:01 AM 03/29/2015 04:41:50 AM NYPD
Location
0 (40.86568153633767, -73.92350095571744)
1 (40.775945312321085, -73.91509393898605)
2 (40.870324522111424, -73.88852464418646)
3 (40.83599404683083, -73.82837939584206)
4 (40.733059618956815, -73.87416975810375)
... ...
300693 NaN
300694 (40.69407728322387, -73.8460866160573)
300695 (40.69959035300927, -73.94423377144169)
300696 (40.8377075854206, -73.83458731019586)
300697 (40.76058322950115, -73.98592204392392)
In [18]:
#Display the complaint type and city together
serv_nyc1.loc[:,['Complaint Type','City']]
Out[18]:
Noise -
0 NEW YORK
Street/Sidewalk
In [19]:
#Find major complaint types
major=serv_nyc1.loc[:,"Complaint Type"]
major
Out[19]:
0 Noise - Street/Sidewalk
1 Blocked Driveway
2 Blocked Driveway
3 Illegal Parking
4 Illegal Parking
...
300693 Noise - Commercial
300694 Blocked Driveway
300695 Noise - Commercial
300695 Noise - Commercial
300696 Noise - Commercial
300697 Noise - Commercial
Name: Complaint Type, Length: 300698, dtype: object
In [20]:
major.unique()
Out[20]:
array(['Noise - Street/Sidewalk', 'Blocked Driveway', 'Illegal Parking',
'Derelict Vehicle', 'Noise - Commercial',
'Noise - House of Worship', 'Posting Advertisement',
'Noise - Vehicle', 'Animal Abuse', 'Vending', 'Traffic',
'Drinking', 'Bike/Roller/Skate Chronic', 'Panhandling',
'Noise - Park', 'Homeless Encampment', 'Urinating in Public',
'Graffiti', 'Disorderly Youth', 'Illegal Fireworks',
'Ferry Complaint', 'Agency Issues', 'Squeegee', 'Animal in a Park'],
dtype=object)
In [21]:
major.nunique()
Out[21]:
24
In [22]:
#major complaints are Blocked Driveway
top=major.value_counts()
top
Out[22]:
Blocked Driveway 77044
Illegal Parking 75361
Noise - Street/Sidewalk 48612
Noise - Commercial 35577
Derelict Vehicle 17718
Noise - Vehicle 17083
Animal Abuse 7778
Traffic 4498
Homeless Encampment 4416
Noise - Park 4042
Vending 3802
Drinking 1280
Noise - House of Worship 931
Posting Advertisement 650
Urinating in Public 592
Bike/Roller/Skate Chronic 427
Panhandling 307
Disorderly Youth 286
Illegal Fireworks 168
Graffiti 113
Agency Issues 6
Squeegee 4
Ferry Complaint 2
Animal in a Park 1
Name: Complaint Type, dtype: int64
In [23]:
# Find the top 10 complaint types
top.head(10)
Out[23]:
Blocked Driveway 77044
Illegal Parking 75361
Noise - Street/Sidewalk 48612
Noise - Commercial 35577
Noise - Commercial 35577
Derelict Vehicle 17718
Noise - Vehicle 17083
Animal Abuse 7778
Traffic 4498
Homeless Encampment 4416
Noise - Park 4042
Name: Complaint Type, dtype: int64
In [25]:
# Plot a bar graph of count vs. complaint types
In [26]:
major.value_counts().plot(kind='bar',title='count vs. complaint types')
Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x265821e39d0>
In [27]:
#Visualize the complaint types
top.plot(kind='hist',title='Visualize the complaint types')
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x26582960d30>
In [28]:
#Display the major complaint types and their count
top.head(10).plot(kind='bar',title='The major complaint types and their count')
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x26582a3eb20>
In [ ]: