import numpy as np #Is more of a low level language
import pandas as pd #Works better for Time series (It is a high level)
import matplotlib #Done for graphs
import scipy
%pylab inline
#This makes graph appear inline
figsize(15, 5) # Make the graphs a bit prettier, and bigger
pd.set_option('display.mpl_style', 'default')
M2014=https://www.dropbox.com/s/9d6bpp00z9gcq70/multas2014.csv?dl=0;
M2013=https://www.dropbox.com/s/3ksnfx80jrvw299/multas2013.csv?dl=0;
data_2014 = pd.read_csv(M2014)
data_2013 = pd.read_csv(M2013)
data_2013["Fecha"]=pd.to_datetime(data_2013["Fecha"[0:10]], errors='coerce', format="%d/%m/%Y" , exact=0)
#On 2013 I did a substring [0:10] because the data had some inconsistency and I only wanted the date not timestamp
data_2014["Fecha"]=pd.to_datetime(data_2014["Fecha"], format="%d/%m/%Y %H:%M", errors='coerce' )
#The coerce allows you skip the errors
#Tickets in 2014 by day of week
data_2014["Fecha"].dt.dayofweek.value_counts()
data_2014["Fecha"].dt.hour.value_counts()
data_2014["Fecha"].dt.month.value_counts()
plt.title("Tickets By Day")
plt.xlabel("Value")
plt.ylabel("Frequency")
axis=[0, 7, 0, 45000]
data_2014["Fecha"].dt.dayofweek.plot(kind="hist", bins=7, rwidth=.8 )
pd.crosstab(data_2014["Fecha"].dt.month, data_2013["Ordenanza"])
data_2014[data_2014["Fecha"].dt.month >10]["Articulo"].value_counts()
data_2014["Ordenanza"].unique()
plt.title("Tickets By Month")
plt.xlabel("Value")
plt.ylabel("Frequency")
axis=[0, 12, 0, 45000]
data_2014["Fecha"].dt.month.plot(kind="hist", bins=12 , color='b', label='2014', rwidth=.8) #Remember the number of bins
data_2013["Fecha"].dt.month.plot(kind="hist", bins=12, color='r', alpha=0.3, label='2013', rwidth=.8)
plt.legend()