from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns #advanced visualization library
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
data = pd.read_csv('/content/drive/My Drive/Chicago_Traffic_Tracker_-_Historical_Congestion_Estimates_by_Region_-_2018-Current(1).csv')
data.head()
TIME : Timestamp of the record
RGION_ID : Unique arbitrary number to represent each region
SPEED : Estimated congestion level. Although expressed in miles per hour, this value is more a reflection of the congestion level in the region than it is indicative of the average raw speed vehicles are travelling within the region.
REGION : Name of the region.
BUS_COUNT : The number of buses used to estimate traffic.
NUM_READS : Number of GPS probes received(or used) for estimating the speed for that segment.
WEST: Approximate longitude of the west edge of the region.
EAST: Approximate longitude of the east edge of the region.
SOUTH : Approximate latitude of the south edge of the region.
NORTH : Approximate latitude of the north edge of the region.
NW_LOACATION : The location corresponding to the intersection of NORTH and WEST in a format that allows for creation of maps and other geographic operations on this data portal.
SE_LOCATION : The location corresponding to the intersection of SOUTH and EAST in a format that allows for creation of maps and other geographic operations on this data portal.
data["TIME"]=pd.to_datetime(data["TIME"], format="%m/%d/%Y %I:%M:%S %p")
data["dayofweek"]=data["TIME"].dt.dayofweek
data['DAY'] = data['TIME'].dt.day
data['MONTH'] = data['TIME'].dt.month
data['YEAR'] = data['TIME'].dt.year
list_REGION = []
for i in range(29) :
reg = data[(data['REGION_ID']==i+1)].REGION.unique()[0]
list_REGION.append(reg)
data = data.groupby(['REGION','REGION_ID','MONTH','DAY','YEAR','HOUR','NORTH','WEST','EAST', 'SOUTH','DAY_OF_WEEK'])[['SPEED','BUS_COUNT','NUM_READS']].agg('mean').reset_index()
data['MINUTE'] = '00'
data['Time'] = pd.to_datetime(data[['YEAR','MONTH','DAY','HOUR','MINUTE']].astype(str).agg('-'.join,axis=1),format='%Y-%m-%d-%H-%M')
data['CENTER_LAT']=data['NORTH']*0.5+0.5*data['SOUTH']
data['CENTER_LON']=data['EAST']*0.5+0.5*data['WEST']
data['Time'] = data.Time.dt.strftime("%a, %d %b, %Y at %l:%M %p")
plt.figure(figsize=(20,10))
plt.title("Mean speed per region")
sns.barplot(data.SPEED,data.REGION)
from numpy import median
plt.figure(figsize=(20,10))
plt.title("Median speed per region")
sns.barplot(data.SPEED,data.REGION,estimator=median)
data2=data[data["HOUR"].isin([8,9,15,16,17])]
plt.figure(figsize=(20,10))
plt.title("Median speed during rush hours")
sns.barplot(data2.SPEED,data2.REGION,estimator=median)
from datetime import datetime
loop=data[data["REGION"].isin(["Chicago Loop"])]
loop=loop.drop_duplicates("Time",keep='first')
fig,ax=plt.subplots(1,1,figsize=(15,15))
sub_set = loop[(loop['YEAR']==2019) & (loop['MONTH'] == 4) & (8 <= loop['DAY']) & (loop['DAY'] <= 14)].copy()
timelist=sub_set["Time"]
plt.plot(sub_set.Time,sub_set.SPEED)
plt.title("Speed during a week")
plt.ylabel("Miles per hour")
ax.set_xticks(timelist[::8])
plt.xticks(rotation=90)