Skip to content

Commit

Permalink
rehash
Browse files Browse the repository at this point in the history
  • Loading branch information
petyaracz committed Nov 5, 2018
1 parent 6bff203 commit b870cc9
Show file tree
Hide file tree
Showing 7 changed files with 692 additions and 372 deletions.
300 changes: 300 additions & 0 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
library(tidyverse)
library(rtweet)
library(countyweather)
library(lubridate)
library(itsadug)
library(knitr)
getwd()
# seattle_rain_d = seattle_rain$daily_data
# write.csv(seattle_rain_d, file = 'seattle_rain.txt', row.names = F)
seattle_rain = read.csv('Data/seattle_rain.txt')
View(seattle_rain)
rain_daily = seattle_rain %>%
mutate(day = date %>% as.Date) %>% # make a new column called "day" which is a date, not a factor
select(prcp,day) # keep this and the amount of precipitation
View(rain_daily)
ggplotTime = function(df, yval, colour, ylab, title){
ggplot(df, aes(x = day, y = yval %>% log)) + # plot the df, logged n n.tweets across day
geom_line(colour = colour) + # draw a line
scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.title.x=element_blank()) + # people can figure out what "OCT 31" is
ylab(ylab) + # nice ylab
ggtitle(title)
}
ggplotTime(df = rain_daily,
yval = rain_daily$prcp,
colour = 'deeppink4',
ylab = 'daily precipitation, mm',
title = 'average rain in Seattle (King County)'
)
rain_daily$prcp
ggplot(rain_daily, aes(x = day, y = prcp %>% log)) + # plot the df, logged n n.tweets across day
geom_line(colour = colour)
ggplot(rain_daily, aes(x = day, y = prcp %>% log)) + # plot the df, logged n n.tweets across day
geom_line(colour = 'deeppink4')
tweets = read.csv('Data/seattle_twitter_hits2.txt')
tweets_daily = tweets %>%
mutate(date = ymd_hms(created_at)) %>% # turn date into a nice format
arrange(date) %>% # arrange by it
group_by(day = floor_date(date, 'day') %>% as.Date) %>% # strip h-m-s and group by days
summarise(n.tweets = n()) %>%
mutate(log.tweets = log(n.tweets)) # count how many tweets fall to one day
ggplotTime = function(df, yval, colour, ylab, title){
ggplot(df, aes(x = day, y = yval)) + # plot the df, logged n n.tweets across day
geom_line(colour = colour) + # draw a line
scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.title.x=element_blank()) + # people can figure out what "OCT 31" is
ylab(ylab) + # nice ylab
ggtitle(title)
}
ggplotTime(df = rain_daily,
yval = rain_daily$prcp,
colour = 'deeppink4',
ylab = 'daily precipitation, mm',
title = 'average rain in Seattle (King County)'
)
ggplotTime(df = tweets_daily,
yval = tweets_daily$log.tweets,
colour = 'goldenrod2',
ylab = 'daily weather tweet count (logged)',
title = 'tweets on weather round Seattle'
)
s_daily = tweets_daily %>% # take daily tweets
merge(rain_daily) %>% # merge with daily precipitation
mutate(n.tweets = scales::rescale(log.tweets), # rescale variables so we can compare them
prcp = scales::rescale(prcp)) %>%
gather(variable, value, -day)
ggplot(s_daily, aes(x = day, y = value, colour = variable)) + # plot the new df, logged n n.tweets across day
geom_line() + # draw a line
scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.title.x=element_blank(), # people can figure out what "OCT 31" is
legend.position="top") + # put legend on too
ylab('daily scaled extent') + # nice ylab
scale_colour_manual(name = '', breaks = c('n.tweets', 'prcp'), labels = c('tweets (log(n))', 'rain (mm)'), values = c('goldenrod2','deeppink4')) # legend! no name, custom break labels, match colours with previous plots
ggplot(s_daily, aes(x = day, y = value, colour = variable)) + # plot the new df, logged n n.tweets across day
geom_line()
s_daily = tweets_daily %>% # take daily tweets
merge(rain_daily) %>% # merge with daily precipitation
mutate(log.tweets = scales::rescale(log.tweets), # rescale variables so we can compare them
prcp = scales::rescale(prcp)) %>%
gather(variable, value, -day)
s_daily
s_daily = tweets_daily %>% # take daily tweets
merge(rain_daily) %>% # merge with daily precipitation
select(-n.tweets) %>% # we don't need this
mutate(log.tweets = scales::rescale(log.tweets), # rescale variables so we can compare them
prcp = scales::rescale(prcp)) %>%
gather(variable, value, -day)
ggplot(s_daily, aes(x = day, y = value, colour = variable)) + # plot the new df, logged n n.tweets across day
geom_line() + # draw a line
scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
axis.title.x=element_blank(), # people can figure out what "OCT 31" is
legend.position="top") + # put legend on too
ylab('daily scaled extent') + # nice ylab
scale_colour_manual(name = '', breaks = c('n.tweets', 'prcp'), labels = c('tweets (log(n))', 'rain (mm)'), values = c('goldenrod2','deeppink4')) # legend! no name, custom break labels, match colours with previous plots
try(setwd('Work/Bristol/dplace/sccs_blog_post/sccs_blog_post'))
try(setwd('~/Work/Bristol/dplace/sccs_blog_post/sccs_blog_post'))
try(setwd('~/Work/Bristol/dplace/sccs_blog_post/sccs_blog_post'))
ea = read.csv('cousin_paper/rehash/ea_tidy.csv') # ea in long format
ea = read.csv('ea_tidy.csv') # ea in long format
mapWorld <- map_data('world', wrap=c(-25,335), ylim=c(-55,75)) # pacific centered
cousin = ea %>% #from the D-Place/Ethnographic atlas
filter(title == 'Cousin marriages permitted', !is.na(code)) %>% # look up cousin marriage
mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
select(society,family,region,marry_first_cousin,in_sccs)
cousin = ea %>%
filter(title == 'Descent: major type', !is.na(code)) %>% # now look up descent
mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>% # mark societies with patrilineal descent
select(society,family,region,patrilineal) %>%
merge(cousin) # merge the two sets
View(cousin)
biggest_family <- cousin %>% filter(family!='') %>%
group_by(family) %>%
summarise(n = n()) %>%
arrange(-n) %>%
head(12) %>%
pull(family)
d_big <- d %>% filter(family %in% biggest_family)
ea_big <- ea %>% filter(family %in% biggest_family)
names(ea_big)
cousin_big <- cousin %>%
filter(family %in% biggest_family)
names(cousin_big)
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = ea_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
guides(colour = F) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
scale_colour_manual(values = rainbow(12))
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
guides(colour = F) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
scale_colour_manual(values = rainbow(12))
ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, d$lon)
ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, ea$lon)
cousin = ea %>% #from the D-Place/Ethnographic atlas
filter(title == 'Cousin marriages permitted', !is.na(code)) %>% # look up cousin marriage
mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
select(society,family,region,marry_first_cousin,in_sccs)
cousin = ea %>%
filter(title == 'Descent: major type', !is.na(code)) %>% # now look up descent
mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>% # mark societies with patrilineal descent
select(society,family,region,patrilineal) %>%
merge(cousin) # merge the two sets
ctable = cousin %>%
group_by(patrilineal,marry_first_cousin) %>%
summarise(n = n()) %>%
dcast(patrilineal ~ marry_first_cousin)
library(tidyverse) # for data wrangling
cousin = ea %>% #from the D-Place/Ethnographic atlas
filter(title == 'Cousin marriages permitted', !is.na(code)) %>% # look up cousin marriage
mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
select(society,family,region,marry_first_cousin,in_sccs)
cousin = ea %>%
filter(title == 'Descent: major type', !is.na(code)) %>% # now look up descent
mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>% # mark societies with patrilineal descent
select(society,family,region,patrilineal) %>%
merge(cousin) # merge the two sets
ctable = cousin %>%
group_by(patrilineal,marry_first_cousin) %>%
summarise(n = n()) %>%
dcast(patrilineal ~ marry_first_cousin)
ctable = cousin %>%
group_by(patrilineal,marry_first_cousin) %>%
summarise(n = n()) %>%
reshape2::dcast(patrilineal ~ marry_first_cousin)
ctable
biggest_family <- cousin %>% filter(family!='') %>%
group_by(family) %>%
summarise(n = n()) %>%
arrange(-n) %>%
head(12) %>%
pull(family)
cousin_big <- cousin %>%
filter(family %in% biggest_family)
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
guides(colour = F) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
scale_colour_manual(values = rainbow(12))
ea$lon2
ea$lon
cousin
cousin %>% head
ea %>% head
ea = read.csv('Data/ea_tidy.csv') # ea in long format
ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, ea$lon)
ea$lon2
cousin = ea %>% #from the D-Place/Ethnographic atlas
filter(title == 'Cousin marriages permitted', !is.na(code)) %>% # look up cousin marriage
mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
select(society,family,region,marry_first_cousin,in_sccs)
cousin = ea %>%
filter(title == 'Descent: major type', !is.na(code)) %>% # now look up descent
mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>% # mark societies with patrilineal descent
select(society,family,region,patrilineal) %>%
merge(cousin) # merge the two sets
cousin = ea %>% #from the D-Place/Ethnographic atlas
filter(title == 'Cousin marriages permitted', !is.na(code)) %>% # look up cousin marriage
mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
select(society,family,region,marry_first_cousin,in_sccs,lon2,lat)
cousin = ea %>%
filter(title == 'Descent: major type', !is.na(code)) %>% # now look up descent
mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>% # mark societies with patrilineal descent
select(society,family,region,patrilineal) %>%
merge(cousin) # merge the two sets
View(cousin)
View(cousin_big)
biggest_family <- cousin %>%
filter(family!='') %>%
group_by(family) %>%
summarise(n = n()) %>%
arrange(-n) %>%
head(12) %>%
pull(family)
cousin_big <- cousin %>%
filter(family %in% biggest_family)
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
guides(colour = F) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
scale_colour_manual(values = rainbow(12))
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = ifelse(patrilineal, 'p','m'))) +
guides(colour = F) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
labs(caption = 'main line of descent: [p]atrilineal or [m]atrilineal. twelve largest language families.') +
scale_colour_manual(values = rainbow(12))
cousin
head(cousin)
head(ea)
ea = read.csv('Data/ea_tidy.csv') # ea in long format
ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, ea$lon)
ea = mutate(ea, cousin_marriage =
ifelse(EA023 %in% c(7,8), 1,
ifelse(EA023 %in% c(11,12), 2,
ifelse(EA023 %in% c(1,2,3,4,5,6,9,13), 3,
ifelse(EA023 %in% c(10), 4,NA
)))))
ea = read.csv('Data/ea_tidy.csv') # ea in long format
View(ea)
labs(colour = 'descent type:', caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
ggplot() +
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = d_big_descent, aes(x = lon2, y = lat, colour = descent_type, label = ifelse(marry_first_cousin, 'a','f'))) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
scale_colour_manual(values = c('violetred','gold4')) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="top") +
guides(colour = guide_legend(override.aes = list(size=11))) +
labs(colour = 'descent type:', caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = descent_type, label = ifelse(marry_first_cousin, 'a','f'))) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
scale_colour_manual(values = c('violetred','gold4')) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="top") +
guides(colour = guide_legend(override.aes = list(size=11))) +
labs(colour = 'descent type:', caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = patrilineal, label = ifelse(marry_first_cousin, 'a','f'))) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
scale_colour_manual(values = c('violetred','gold4')) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="top") +
guides(colour = guide_legend(override.aes = list(size=11))) +
labs(colour = 'descent type:', brackets = c('TRUE', 'FALSE'), labels = c('patrilineal', 'other'), caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = patrilineal, label = ifelse(marry_first_cousin, 'a','f'))) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
scale_colour_manual(values = c('violetred','gold4')) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="top") +
guides(colour = guide_legend(override.aes = list(size=11))) +
labs(colour = 'descent type:', breaks = c('TRUE', 'FALSE'), labels = c('patrilineal', 'other'), caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
ggplot() +
geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = patrilineal, label = ifelse(marry_first_cousin, 'a','f'))) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="top") +
guides(colour = guide_legend(override.aes = list(size=11))) +
scale_colour_manual(name = '', breaks = c('TRUE', 'FALSE'), labels = c('patrilineal', 'other'), values = c('violetred','gold4')) +
labs(caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
Binary file added Figs/unnamed-chunk-13-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Figs/unnamed-chunk-25-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Figs/unnamed-chunk-5-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit b870cc9

Please sign in to comment.