In [ ]:
library(dplyr)
library(ggplot2)
library(plotly)
a = read.csv("/home/mansiarora/Documents/DataCor_egov/Data/ddive_rolled_seq.csv")
str(a)

In [ ]:
a$Complaint.Date = as.Date(a$Complaint.Date, format = "%Y-%m-%d")
a$Month = format(a$Complaint.Date, format = "%m")
a$Year = format(a$Complaint.Date, "%Y")
a$Week = format(a$Complaint.Date, "%W")
a$Weekday = weekdays(a$Complaint.Date)
a$Week_Year = paste0(a$Week, "_", a$Year)
a$Month_Year = paste0(a$Month, "_", a$Year)

In [ ]:
# Counts by complaint type
b = a %>%  group_by(Complaint.Type) %>%
    summarise(count = sum(complaint_count))
b = b[order(-b$count),]  
b <- transform(b, Complaint.Type = reorder(Complaint.Type, -count))
ggplot(b[1:20,], aes(x=Complaint.Type, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
topcomplaints = unique(b$Complaint.Type)[1:10]

In [ ]:
# Counts by Ward
c = a %>%
  group_by(Ward) %>%
  summarise(count = sum(complaint_count))
c = c[order(-c$count),]  
c <- transform(c, Ward = reorder(Ward, -count))
ggplot(c[1:20,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
topwards = unique(c$Ward)[1:10]
topwards

In [ ]:
# Counts by complaint type and ward
d = a %>%
  group_by(Complaint.Type, Ward) %>%
    summarise(count = sum(complaint_count))
d = d[order(-d$count),]

In [ ]:
# top 5 complaints of each ward
e = data_frame(Complaint.Type = character(), Ward = character(), count = numeric())
for( i in 1:length(topwards)) {
  x = d[d$Ward == topwards[i],]
  others_sum=sum(x[11:nrow(x),]$count, na.rm = TRUE)
  x = data.frame(x[1:5,])
  #y = data.frame(Complaint.Type = "Others", Ward = topwards[i], count=others_sum)
  #x = rbind(x, y)
  e = rbind(e, x)
}
ggplot(e, aes(x=Ward, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill")

In [ ]:
# Counts by complaint date (month)
f = a %>%
  group_by(Month) %>%
  summarise(count = sum(complaint_count))
f = f[order(-f$count),]  
ggplot(f, aes(x=Month, y=count)) + geom_bar(stat = "identity")

In [ ]:
g = a %>%
  group_by(Year) %>%
  summarise(count = sum(complaint_count))
g = g[order(-g$count),]  
ggplot(g, aes(x=Year, y=count)) + geom_bar(stat = "identity")

In [ ]:
h = a %>%
  group_by(Month, Year) %>%
  summarise(count = sum(complaint_count))
h = h[order(-h$count),]  
ggplot(h, aes(x=Month, y=count, fill=Year)) + geom_bar(stat = "identity", position = "fill")
ggplot(h, aes(x=Year, y=count, fill=Month)) + geom_bar(stat = "identity", position = "fill")

In [ ]:
# Trend of top complaint types over time
i = a[a$Complaint.Type %in% topcomplaints,] %>%
  group_by(Complaint.Type, Year) %>%
  summarise(count = sum(complaint_count))
i = i[order(-i$count),]  
ggplot(i, aes(x=Year, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill")

j = a[a$Complaint.Type %in% topcomplaints,] %>%
  group_by(Complaint.Type, Month) %>%
  summarise(count = sum(complaint_count))
j = j[order(-j$count),]  
ggplot(j, aes(x=Month, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill")

k = a[a$Complaint.Type %in% topcomplaints,] %>%
  group_by(Complaint.Type, Month_Year) %>%
  summarise(count = sum(complaint_count))
k = k[order(-k$count),]  
ggplot(k, aes(x=Month_Year, y=count, fill=Complaint.Type)) + geom_bar(stat = "identity", position = "fill") + theme(axis.text.x=element_text(angle=90,hjust=0.4,vjust=0.1))

In [ ]:
# Trend of top wards over time
l = a[a$Ward %in% topwards,] %>%
  group_by(Ward, Year) %>%
  summarise(count = sum(complaint_count))
l = l[order(-l$count),]  
l$Ward <- factor(l$Ward, topwards)
ggplot(l, aes(x=Year, y=count, fill=Ward)) + geom_bar(stat = "identity")
ggplot(l, aes(x=Year, y=count, fill=Ward)) + geom_bar(stat = "identity", position = "fill")

m = a[a$Ward %in% topwards,] %>%
  group_by(Ward, Month) %>%
  summarise(count = sum(complaint_count))
m = m[order(-m$count),]  
ggplot(m, aes(x=Month, y=count, fill=Ward)) + geom_bar(stat = "identity")
ggplot(m, aes(x=Month, y=count, fill=Ward)) + geom_bar(stat = "identity", position = "fill")

In [ ]:
#install.packages("gridExtra")
library("gridExtra")

# Density plot
C1 = a[a$Complaint.Type == topcomplaints[1],]
D1 = ggplot(C1, aes(x=complaint_count)) + geom_density()

C2 = a[a$Complaint.Type == topcomplaints[2],]
D2 = ggplot(C2, aes(x=complaint_count)) + geom_density()

C3 = a[a$Complaint.Type == topcomplaints[3],]
D3 = ggplot(C3, aes(x=complaint_count)) + geom_density()

C4 = a[a$Complaint.Type == topcomplaints[4],]
D4 = ggplot(C4, aes(x=complaint_count)) + geom_density()

C5 = a[a$Complaint.Type == topcomplaints[5],]
D5 = ggplot(C5, aes(x=complaint_count)) + geom_density()

C6 = a[a$Complaint.Type == topcomplaints[6],]
D6 = ggplot(C6, aes(x=complaint_count)) + geom_density()

grid.arrange(D1, D2, D3, D4, D5, D6, ncol=3, nrow =2)

In [ ]:
# Complaint Count by date

C1a = C1 %>%
  group_by(Complaint.Date) %>%
  summarise(count = sum(complaint_count))
a = ggplot(C1a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))

C2a = C2 %>%
  group_by(Complaint.Date) %>%
  summarise(count = sum(complaint_count))
b = ggplot(C2a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))


C3a = C3 %>%
  group_by(Complaint.Date) %>%
  summarise(count = sum(complaint_count))
c = ggplot(C3a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))


C4a = C4 %>%
  group_by(Complaint.Date) %>%
  summarise(count = sum(complaint_count))
d = ggplot(C4a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))


C5a = C5 %>%
  group_by(Complaint.Date) %>%
  summarise(count = sum(complaint_count))
e = ggplot(C5a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))


C6a = C6 %>%
  group_by(Complaint.Date) %>%
  summarise(count = sum(complaint_count))
f = ggplot(C6a, aes(x=Complaint.Date,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 8))

grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)

In [ ]:
# Complaint Count by Year

C1b = C1 %>%
  group_by(Year) %>%
  summarise(count = sum(complaint_count))
a = ggplot(C1b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))

C2b = C2 %>%
  group_by(Year) %>%
  summarise(count = sum(complaint_count))
b = ggplot(C2b, aes(x=Year,y=count)) +  geom_bar(stat="identity") + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))

C3b = C3 %>%
  group_by(Year) %>%
  summarise(count = sum(complaint_count))
c = ggplot(C3b, aes(x=Year,y=count))+ geom_bar(stat="identity") + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))

C4b = C4 %>%
  group_by(Year) %>%
  summarise(count = sum(complaint_count))
d = ggplot(C4b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))

C5b = C5 %>%
  group_by(Year) %>%
  summarise(count = sum(complaint_count))
e = ggplot(C5b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))

C6b = C6 %>%
  group_by(Year) %>%
  summarise(count = sum(complaint_count))
f = ggplot(C6b, aes(x=Year,y=count)) + geom_bar(stat="identity") + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))

grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)

In [ ]:
# Complaint Count by Week

C1c = C1 %>%
  group_by(Week) %>%
  summarise(count = sum(complaint_count))
a = ggplot(C1c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))

C2c = C2 %>%
  group_by(Week) %>%
  summarise(count = sum(complaint_count))
b = ggplot(C2c, aes(x=Week,y=count)) +  geom_line(group=1) + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))

C3c = C3 %>%
  group_by(Week) %>%
  summarise(count = sum(complaint_count))
c = ggplot(C3c, aes(x=Week,y=count))+ geom_line(group=1) + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))

C4c = C4 %>%
  group_by(Week) %>%
  summarise(count = sum(complaint_count))
d = ggplot(C4c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))

C5c = C5 %>%
  group_by(Week) %>%
  summarise(count = sum(complaint_count))
e = ggplot(C5c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))

C6c = C6 %>%
  group_by(Week) %>%
  summarise(count = sum(complaint_count))
f = ggplot(C6c, aes(x=Week,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))

grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)

In [ ]:
# Complaint Count by Weekday

C1d = C1 %>%
  group_by(Weekday) %>%
  summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C1d$Weekday) = days
a = ggplot(C1d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C2d = C2 %>%
  group_by(Weekday) %>%
  summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C2d$Weekday) = days
b = ggplot(C2d, aes(x=Weekday,y=count)) +  geom_line(group=1) + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C3d = C3 %>%
  group_by(Weekday) %>%
  summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C3d$Weekday) = days
c = ggplot(C3d, aes(x=Weekday,y=count))+ geom_line(group=1) + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C4d = C4 %>%
  group_by(Weekday) %>%
  summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C4d$Weekday) = days
d = ggplot(C4d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C5d = C5 %>%
  group_by(Weekday) %>%
  summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C5d$Weekday) = days
e = ggplot(C5d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C6d = C6 %>%
  group_by(Weekday) %>%
  summarise(count = sum(complaint_count))
days = c("Mon", "Tue", "Wed", "Thurs", "Fri", "Sat", "Sun")
levels(C6d$Weekday) = days
f = ggplot(C6d, aes(x=Weekday,y=count)) + geom_line(group=1) + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)

In [ ]:
#Top wards of each complaint type
C1e = C1 %>%
  group_by(Ward) %>%
  summarise(count = sum(complaint_count))
C1e = C1e[order(-C1e$count),]  
C1e <- transform(C1e, Ward = reorder(Ward, -count))
a = ggplot(C1e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[1]) + theme(plot.title = element_text(size = 10))
topwards_C1 = C1e[1:10,]$Ward

C2e = C2 %>%
  group_by(Ward) %>%
  summarise(count = sum(complaint_count))
C2e = C2e[order(-C2e$count),]  
C2e <- transform(C2e, Ward = reorder(Ward, -count))
b = ggplot(C2e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[2]) + theme(plot.title = element_text(size = 10))
topwards_C2 = C2e[1:10,]$Ward

C3e = C3 %>%
  group_by(Ward) %>%
  summarise(count = sum(complaint_count))
C3e = C3e[order(-C3e$count),]  
C3e <- transform(C3e, Ward = reorder(Ward, -count))
c = ggplot(C3e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[3]) + theme(plot.title = element_text(size = 10))
topwards_C3 = C3e[1:10,]$Ward

C4e = C4 %>%
  group_by(Ward) %>%
  summarise(count = sum(complaint_count))
C4e = C4e[order(-C4e$count),]  
C4e <- transform(C4e, Ward = reorder(Ward, -count))
d = ggplot(C4e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[4]) + theme(plot.title = element_text(size = 10))
topwards_C4 = C4e[1:10,]$Ward

C5e = C5 %>%
  group_by(Ward) %>%
  summarise(count = sum(complaint_count))
C5e = C5e[order(-C5e$count),]  
C5e <- transform(C5e, Ward = reorder(Ward, -count))
e = ggplot(C5e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[5]) + theme(plot.title = element_text(size = 10))
topwards_C5 = C5e[1:10,]$Ward

C6e = C6 %>%
  group_by(Ward) %>%
  summarise(count = sum(complaint_count))
C6e = C6e[order(-C6e$count),]  
C6e <- transform(C6e, Ward = reorder(Ward, -count))
f = ggplot(C6e[1:10,], aes(x=Ward, y=count)) + geom_bar(stat = "identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + ggtitle(topcomplaints[6]) + theme(plot.title = element_text(size = 10))
topwards_C6 = C6e[1:10,]$Ward

grid.arrange(a, b, c, d, e, f, ncol=3, nrow =2)

In [ ]:
# Trends of top wards over time for each complaint type
C1f = C1 %>%
  group_by(Ward, Year) %>%
  summarise(count = sum(complaint_count))
C1f = C1f[order(-C1f$count),]  
C1f <- transform(C1f, Ward = reorder(Ward, -count))
a = ggplot(C1f[C1f$Ward %in% topwards_C1,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) 

C2f = C2 %>%
  group_by(Ward, Year) %>%
  summarise(count = sum(complaint_count))
C2f = C2f[order(-C2f$count),]  
C2f <- transform(C2f, Ward = reorder(Ward, -count))
b = ggplot(C2f[C2f$Ward %in% topwards_C2,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C3f = C3 %>%
  group_by(Ward, Year) %>%
  summarise(count = sum(complaint_count))
C3f = C3f[order(-C3f$count),]  
C3f <- transform(C3f, Ward = reorder(Ward, -count))
c = ggplot(C3f[C3f$Ward %in% topwards_C3,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C4f = C4 %>%
  group_by(Ward, Year) %>%
  summarise(count = sum(complaint_count))
C4f = C4f[order(-C4f$count),]  
C4f <- transform(C4f, Ward = reorder(Ward, -count))
d = ggplot(C4f[C4f$Ward %in% topwards_C4,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C5f = C5 %>%
  group_by(Ward, Year) %>%
  summarise(count = sum(complaint_count))
C5f = C5f[order(-C5f$count),]  
C5f <- transform(C5f, Ward = reorder(Ward, -count))
e = ggplot(C5f[C5f$Ward %in% topwards_C5,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

C6f = C6 %>%
  group_by(Ward, Year) %>%
  summarise(count = sum(complaint_count))
C6f = C6f[order(-C6f$count),]  
C6f <- transform(C6f, Ward = reorder(Ward, -count))
f = ggplot(C6f[C6f$Ward %in% topwards_C6,], aes(x=Ward, y=count)) + geom_bar(aes(fill = Year), position = "dodge", stat="identity") + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

grid.arrange(a, b, c, d, e, f, ncol=2, nrow =3)