In [ ]:
library(dplyr)
library(ggplot2)
library(plotly)
a = read.csv("/home/mansiarora/Documents/DataCor_egov/Data/ddive_rolled_seq.csv")
str(a)
In [ ]:
a$Complaint.Date = as.Date(a$Complaint.Date, format = "%Y-%m-%d")
a$Month = format(a$Complaint.Date, format = "%m")
a$Year = format(a$Complaint.Date, "%Y")
a$Week = format(a$Complaint.Date, "%W")
a$Weekday = weekdays(a$Complaint.Date)
a$Week_Year = paste0(a$Week, "_", a$Year)
a$Month_Year = paste0(a$Month, "_", a$Year)
In [ ]:
# Counts by complaint type
b = a %>% group_by(Complaint.Type) %>%
summarise(count = sum(complaint_count))
b = b[order(-b$count),]
b <- transform(b, Complaint.Type = reorder(Complaint.Type, -count))
ggplot(b[1:20,], aes(x=Complaint.Type, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
topcomplaints = unique(b$Complaint.Type)[1:10]
In [ ]:
# Counts by Ward
c = a %>%
group_by(Ward) %>%
summarise(count = sum(complaint_count))
c = c[order(-c$count),]
c <- transform(c, Ward = reorder(Ward, -count))
ggplot(c[1:20,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
topwards = unique(c$Ward)[1:10]
topwards
In [ ]:
# Counts by complaint type and ward
d = a %>%
group_by(Complaint.Type, Ward) %>%
summarise(count = sum(complaint_count))
d = d[order(-d$count),]
In [ ]:
# top 5 complaints of each ward
e = data_frame(Complaint.Type = character(), Ward = character(), count = numeric())
for( i in 1:length(topwards)) {
x = d[d$Ward == topwards[i],]
others_sum=sum(x[11:nrow(x),]$count, na.rm = TRUE)
x = data.frame(x[1:5,])
#y = data.frame(Complaint.Type = "Others", Ward = topwards[i], count=others_sum)
#x = rbind(x, y)
e = rbind(e, x)
}
ggplot(e, aes(x=Ward, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill")
In [ ]:
# Counts by complaint date (month)
f = a %>%
group_by(Month) %>%
summarise(count = sum(complaint_count))
f = f[order(-f$count),]
ggplot(f, aes(x=Month, y=count)) + geom_bar(stat = "identity")
In [ ]:
g = a %>%
group_by(Year) %>%
summarise(count = sum(complaint_count))
g = g[order(-g$count),]
ggplot(g, aes(x=Year, y=count)) + geom_bar(stat = "identity")
In [ ]:
h = a %>%
group_by(Month, Year) %>%
summarise(count = sum(complaint_count))
h = h[order(-h$count),]
ggplot(h, aes(x=Month, y=count, fill=Year)) + geom_bar(stat = "identity", position = "fill")
ggplot(h, aes(x=Year, y=count, fill=Month)) + geom_bar(stat = "identity", position = "fill")
In [ ]:
# Trend of top complaint types over time
i = a[a$Complaint.Type %in% topcomplaints,] %>%
group_by(Complaint.Type, Year) %>%
summarise(count = sum(complaint_count))
i = i[order(-i$count),]
ggplot(i, aes(x=Year, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill")
j = a[a$Complaint.Type %in% topcomplaints,] %>%
group_by(Complaint.Type, Month) %>%
summarise(count = sum(complaint_count))
j = j[order(-j$count),]
ggplot(j, aes(x=Month, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill")
k = a[a$Complaint.Type %in% topcomplaints,] %>%
group_by(Complaint.Type, Month_Year) %>%
summarise(count = sum(complaint_count))
k = k[order(-k$count),]
ggplot(k, aes(x=Month_Year, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill") + theme(axis.text.x=element_text(angle=90,hjust=0.4,vjust=0.1))
In [ ]:
# Trend of top wards over time
l = a[a$Ward %in% topwards,] %>%
group_by(Ward, Year) %>%
summarise(count = sum(complaint_count))
l = l[order(-l$count),]
l$Ward <- factor(l$Ward, topwards)
ggplot(l, aes(x=Year, y=count, fill=Ward)) + geom_bar(stat = "identity")
ggplot(l, aes(x=Year, y=count, fill=Ward)) + geom_bar(stat = "identity", position = "fill")
m = a[a$Ward %in% topwards,] %>%
group_by(Ward, Month) %>%
summarise(count = sum(complaint_count))
m = m[order(-m$count),]
ggplot(m, aes(x=Month, y=count, fill=Ward)) + geom_bar(stat = "identity")
ggplot(m, aes(x=Month, y=count, fill=Ward)) + geom_bar(stat = "identity", position = "fill")
In [ ]:
#install.packages("gridExtra")
library("gridExtra")
# Density plot
C1 = a[a$Complaint.Type == topcomplaints[1],]
D1 = ggplot(C1, aes(x=complaint_count)) + geom_density()
C2 = a[a$Complaint.Type == topcomplaints[2],]
D2 = ggplot(C2, aes(x=complaint_count)) + geom_density()
C3 = a[a$Complaint.Type == topcomplaints[3],]
D3 = ggplot(C3, aes(x=complaint_count)) + geom_density()
C4 = a[a$Complaint.Type == topcomplaints[4],]
D4 = ggplot(C4, aes(x=complaint_count)) + geom_density()
C5 = a[a$Complaint.Type == topcomplaints[5],]
D5 = ggplot(C5, aes(x=complaint_count)) + geom_density()
C6 = a[a$Complaint.Type == topcomplaints[6],]
D6 = ggplot(C6, aes(x=complaint_count)) + geom_density()
grid.arrange(D1, D2, D3, D4, D5, D6, ncol=3, nrow =2)
In [ ]:
# Complaint Count by date
C1a = C1 %>%
group_by(Complaint.Date) %>%
summarise(count = sum(complaint_count))
a = ggplot(C1a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))
C2a = C2 %>%
group_by(Complaint.Date) %>%
summarise(count = sum(complaint_count))
b = ggplot(C2a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))
C3a = C3 %>%
group_by(Complaint.Date) %>%
summarise(count = sum(complaint_count))
c = ggplot(C3a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))
C4a = C4 %>%
group_by(Complaint.Date) %>%
summarise(count = sum(complaint_count))
d = ggplot(C4a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))
C5a = C5 %>%
group_by(Complaint.Date) %>%
summarise(count = sum(complaint_count))
e = ggplot(C5a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))
C6a = C6 %>%
group_by(Complaint.Date) %>%
summarise(count = sum(complaint_count))
f = ggplot(C6a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 8))
grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)
In [ ]:
# Complaint Count by Year
C1b = C1 %>%
group_by(Year) %>%
summarise(count = sum(complaint_count))
a = ggplot(C1b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))
C2b = C2 %>%
group_by(Year) %>%
summarise(count = sum(complaint_count))
b = ggplot(C2b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))
C3b = C3 %>%
group_by(Year) %>%
summarise(count = sum(complaint_count))
c = ggplot(C3b, aes(x=Year,y=count))+ geom_bar(stat="identity") + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))
C4b = C4 %>%
group_by(Year) %>%
summarise(count = sum(complaint_count))
d = ggplot(C4b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))
C5b = C5 %>%
group_by(Year) %>%
summarise(count = sum(complaint_count))
e = ggplot(C5b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))
C6b = C6 %>%
group_by(Year) %>%
summarise(count = sum(complaint_count))
f = ggplot(C6b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))
grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)
In [ ]:
# Complaint Count by Week
C1c = C1 %>%
group_by(Week) %>%
summarise(count = sum(complaint_count))
a = ggplot(C1c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))
C2c = C2 %>%
group_by(Week) %>%
summarise(count = sum(complaint_count))
b = ggplot(C2c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))
C3c = C3 %>%
group_by(Week) %>%
summarise(count = sum(complaint_count))
c = ggplot(C3c, aes(x=Week,y=count))+ geom_line(group=1) + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))
C4c = C4 %>%
group_by(Week) %>%
summarise(count = sum(complaint_count))
d = ggplot(C4c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))
C5c = C5 %>%
group_by(Week) %>%
summarise(count = sum(complaint_count))
e = ggplot(C5c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))
C6c = C6 %>%
group_by(Week) %>%
summarise(count = sum(complaint_count))
f = ggplot(C6c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))
grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)
In [ ]:
# Complaint Count by Weekday
C1d = C1 %>%
group_by(Weekday) %>%
summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C1d$Weekday) = days
a = ggplot(C1d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C2d = C2 %>%
group_by(Weekday) %>%
summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C2d$Weekday) = days
b = ggplot(C2d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C3d = C3 %>%
group_by(Weekday) %>%
summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C3d$Weekday) = days
c = ggplot(C3d, aes(x=Weekday,y=count))+ geom_line(group=1) + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C4d = C4 %>%
group_by(Weekday) %>%
summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C4d$Weekday) = days
d = ggplot(C4d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C5d = C5 %>%
group_by(Weekday) %>%
summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C5d$Weekday) = days
e = ggplot(C5d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C6d = C6 %>%
group_by(Weekday) %>%
summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C6d$Weekday) = days
f = ggplot(C6d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)
In [ ]:
#Top wards of each complaint type
C1e = C1 %>%
group_by(Ward) %>%
summarise(count = sum(complaint_count))
C1e = C1e[order(-C1e$count),]
C1e <- transform(C1e, Ward = reorder(Ward, -count))
a = ggplot(C1e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))
topwards_C1 = C1e[1:10,]$Ward
C2e = C2 %>%
group_by(Ward) %>%
summarise(count = sum(complaint_count))
C2e = C2e[order(-C2e$count),]
C2e <- transform(C2e, Ward = reorder(Ward, -count))
b = ggplot(C2e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))
topwards_C2 = C2e[1:10,]$Ward
C3e = C3 %>%
group_by(Ward) %>%
summarise(count = sum(complaint_count))
C3e = C3e[order(-C3e$count),]
C3e <- transform(C3e, Ward = reorder(Ward, -count))
c = ggplot(C3e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))
topwards_C3 = C3e[1:10,]$Ward
C4e = C4 %>%
group_by(Ward) %>%
summarise(count = sum(complaint_count))
C4e = C4e[order(-C4e$count),]
C4e <- transform(C4e, Ward = reorder(Ward, -count))
d = ggplot(C4e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))
topwards_C4 = C4e[1:10,]$Ward
C5e = C5 %>%
group_by(Ward) %>%
summarise(count = sum(complaint_count))
C5e = C5e[order(-C5e$count),]
C5e <- transform(C5e, Ward = reorder(Ward, -count))
e = ggplot(C5e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))
topwards_C5 = C5e[1:10,]$Ward
C6e = C6 %>%
group_by(Ward) %>%
summarise(count = sum(complaint_count))
C6e = C6e[order(-C6e$count),]
C6e <- transform(C6e, Ward = reorder(Ward, -count))
f = ggplot(C6e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))
topwards_C6 = C6e[1:10,]$Ward
grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)
In [ ]:
# Trends of top wards over time for each complaint type
C1f = C1 %>%
group_by(Ward, Year) %>%
summarise(count = sum(complaint_count))
C1f = C1f[order(-C1f$count),]
C1f <- transform(C1f, Ward = reorder(Ward, -count))
a = ggplot(C1f[C1f$Ward %in% topwards_C1,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C2f = C2 %>%
group_by(Ward, Year) %>%
summarise(count = sum(complaint_count))
C2f = C2f[order(-C2f$count),]
C2f <- transform(C2f, Ward = reorder(Ward, -count))
b = ggplot(C2f[C2f$Ward %in% topwards_C2,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C3f = C3 %>%
group_by(Ward, Year) %>%
summarise(count = sum(complaint_count))
C3f = C3f[order(-C3f$count),]
C3f <- transform(C3f, Ward = reorder(Ward, -count))
c = ggplot(C3f[C3f$Ward %in% topwards_C3,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C4f = C4 %>%
group_by(Ward, Year) %>%
summarise(count = sum(complaint_count))
C4f = C4f[order(-C4f$count),]
C4f <- transform(C4f, Ward = reorder(Ward, -count))
d = ggplot(C4f[C4f$Ward %in% topwards_C4,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C5f = C5 %>%
group_by(Ward, Year) %>%
summarise(count = sum(complaint_count))
C5f = C5f[order(-C5f$count),]
C5f <- transform(C5f, Ward = reorder(Ward, -count))
e = ggplot(C5f[C5f$Ward %in% topwards_C5,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
C6f = C6 %>%
group_by(Ward, Year) %>%
summarise(count = sum(complaint_count))
C6f = C6f[order(-C6f$count),]
C6f <- transform(C6f, Ward = reorder(Ward, -count))
f = ggplot(C6f[C6f$Ward %in% topwards_C6,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
grid.arrange(a, b, c, d, e, f, ncol=2, nrow =3)