tibble(fname=fname_data,
lines=nrow(df_all),
cols=ncol(df_all)) %>%
knitr::kable() %>%
kableExtra::kable_styling("striped")
#> Warning in kableExtra::kable_styling(., "striped"): Please specify format
#> in kable. kableExtra can customize either HTML or LaTeX outputs. See
#> https://haozhu233.github.io/kableExtra/ for details.
fname | lines | cols |
---|---|---|
data/df_all.rds | 705527 | 33 |
ano | func_cod | func | empenhado | liquidado | pago |
---|---|---|---|---|---|
2014 | 06 | Segurança Pública | 27420994 | 27420994 | 27420994 |
2014 | 08 | Assistência Social | 80000 | 0 | 0 |
2014 | 04 | Administração | 0 | 0 | 0 |
2014 | 23 | Comércio e Serviços | 1428294 | 1428294 | 1428294 |
2014 | 20 | Agricultura | 12674 | 12674 | 12674 |
df_all_clean_summary <- df_all_clean %>%
rename_all(~str_replace_all(.,fixed("_"),"-")) %>% # to avoid "_" conflict
summarize_if(is.numeric,list(o0.n=~length(.),
o1.min=~min(.), # labels "a." preserve order
o2.q1=~quantile(.,.25),
o3.median=~median(.),
o4.mean=~mean(.),
o5.q3=~quantile(.,.75),
o6.max=~max(.))) %>%
gather() %>%
separate(key,c("col","stat"),sep="_") %>%
spread(stat,value) %>%
rename_all(~str_remove(.,"(..\\.)"))
#> Warning: attributes are not identical across measure variables;
#> they will be dropped
df_all_clean_summary %>%
knitr::kable(digits=0) %>%
kableExtra::kable_styling("striped")
col | n | min | q1 | median | mean | q3 | max |
---|---|---|---|---|---|---|---|
empenhado | 705527 | 0 | 0 | 1062 | 35762907 | 49241 | 256822620214 |
liquidado | 705527 | 0 | 0 | 960 | 36212086 | 45866 | 256822620214 |
pago | 705527 | 0 | 0 | 330 | 32972451 | 18648 | 256822620214 |
df_all_clean_summary_card <- df_all_clean %>%
rename_all(~str_replace_all(.,fixed("_"),"-")) %>% # to avoid "_" conflict
summarize_if(~!is.numeric(.),list(o0.n=~length(.),
o1.card=~n_distinct(.))) %>%
gather() %>%
separate(key,c("col","stat"),sep="_") %>%
spread(stat,value) %>%
rename_all(~str_remove(.,"(..\\.)"))
df_all_clean_summary_card %>%
knitr::kable(digits=0) %>%
kableExtra::kable_styling("striped")
col | n | card |
---|---|---|
ano | 705527 | 5 |
func | 705527 | 25 |
func-cod | 705527 | 25 |
df_top_funcs_empenhado <- df_all_clean %>%
group_by(func_cod,func) %>%
summarize(n=n(),
empenhado_b=sum(empenhado)/10^9,
pago_b=sum(pago)/10^9,
) %>%
ungroup() %>%
arrange(desc(empenhado_b)) %>%
head(5) %>%
mutate(rank=row_number(),
func=func%>%fct_drop%>%fct_inorder,
func_cod=func_cod%>%fct_drop%>%fct_inorder) %>%
select(rank,everything())
df_top_funcs_empenhado %>%
knitr::kable(digits=0) %>%
kableExtra::kable_styling("striped")
rank | func_cod | func | n | empenhado_b | pago_b |
---|---|---|---|---|---|
1 | 09 | Previdência Social | 11506 | 7478 | 6988 |
2 | 06 | Segurança Pública | 77487 | 3991 | 3365 |
3 | 28 | Encargos Especiais | 15356 | 3696 | 3600 |
4 | 12 | Educação | 235933 | 2520 | 2181 |
5 | 10 | Saúde | 78066 | 1645 | 1127 |
df_all_clean %>%
mutate(func=func%>%fct_reorder(-empenhado,sum)) %>%
filter(as.integer(func)<6) %>%
mutate(func=func%>%fct_drop%>%fct_rev) %>% # for coord_flip
group_by(func) %>%
# milhoes
summarize_at(vars(empenhado,liquidado,pago),
list(~(sum(.)/10^9))) %>%
gather("tipo","valor",-func) %>%
mutate(tipo=factor(tipo,levels=c("pago","liquidado","empenhado"))) %>%
ggplot(aes(func,valor,group=tipo,fill=tipo)) +
geom_col(position="dodge") +
coord_flip() +
labs(title="Totais por Função",
subtitle="Top 5 por empenho total 2014-8",
y="R$ bilhões") +
theme(legend.position = "top",
legend.title = element_blank(),
axis.title.y=element_blank())
df_all_clean %>%
mutate(func=func%>%fct_reorder(-empenhado,sum)) %>%
filter(as.integer(func)<6) %>%
mutate(func=func%>%fct_drop%>%fct_rev) %>% # for coord_flip
group_by(ano,func) %>%
# milhoes
summarize_at(vars(empenhado,liquidado,pago),
list(~(sum(.)/10^9))) %>%
gather("tipo","valor",-func,-ano) %>%
mutate(tipo=factor(tipo,levels=c("pago","liquidado","empenhado"))) %>%
ggplot(aes(func,valor,group=tipo,fill=tipo)) +
geom_col(position="dodge") +
coord_flip() +
labs(title="Totais por Função",
subtitle="Top 5 por empenho total 2014-8",
y="R$ bilhões") +
theme(legend.position = "top",
legend.title = element_blank(),
axis.title.y=element_blank()) +
facet_wrap(~ano)
top_levs <- df_top_funcs_empenhado$func%>%levels
df_all_clean_gathered <- df_all_clean %>%
{ suppressWarnings(inner_join(.,df_top_funcs_empenhado%>%
select(rank,func_cod),
by="func_cod")) } %>%
arrange(rank) %>%
mutate(func=func%>%fct_drop%>%fct_inorder) %>%
group_by(ano,func) %>%
summarise(empenhado=sum(empenhado),
pago=sum(pago)) %>%
ungroup() %>%
gather("tipo","valor",empenhado,pago)
abbrev_func <- function(s_vec) s_vec %>% str_split(" ") %>% map_chr(~.x%>%str_sub(end=3)%>%str_c(collapse="."))
anos <- df_all_clean_gathered$ano%>%as.character%>%unique
p <- df_all_clean_gathered %>%
mutate(tipo=tipo%>%fct_recode(emp="empenhado",pag="pago"),
func=func%>%abbrev_func%>%fct_inorder,
valor=(valor/10^9)%>%round(1),
func_tipo=func%>%str_c("_",tipo)%>%fct_inorder) %>%
ggplot(aes(ano,valor,
group=func_tipo,
color=func,linetype=tipo)) +
scale_linetype_manual(values=c("dotted","solid"))+
geom_line(size=I(1.2)) +
labs(title="Total Anual por Função",
subtitle=sprintf("Anos %s-%s, Top 5",
min(anos),max(anos)),
y = "R$ bilhões") +
theme_minimal() +
theme(legend.position = "bottom",
legend.title=element_blank(),
axis.title.x=element_blank())
ggplotly(p,tooltip=c("func","valor","tipo")) %>%
# hide_legend() %>%
config(displayModeBar = F)%>%
layout(legend = list(orientation="v",font = list(size = 8)))