rehash

petyaracz · Nov 5, 2018 · b870cc9 · b870cc9
1 parent 6bff203
commit b870cc9
Show file tree

Hide file tree

Showing 7 changed files with 692 additions and 372 deletions.
diff --git a/.Rhistory b/.Rhistory
@@ -0,0 +1,300 @@
+library(tidyverse)
+library(rtweet)
+library(countyweather)
+library(lubridate)
+library(itsadug)
+library(knitr)
+getwd()
+# seattle_rain_d = seattle_rain$daily_data
+# write.csv(seattle_rain_d, file = 'seattle_rain.txt', row.names = F)
+seattle_rain = read.csv('Data/seattle_rain.txt')
+View(seattle_rain)
+rain_daily = seattle_rain %>%
+mutate(day = date %>% as.Date) %>% # make a new column called "day" which is a date, not a factor
+select(prcp,day) # keep this and the amount of precipitation
+View(rain_daily)
+ggplotTime = function(df, yval, colour, ylab, title){
+ggplot(df, aes(x = day, y = yval %>% log)) + # plot the df, logged n n.tweets across day
+geom_line(colour = colour) + # draw a line
+scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
+theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
+axis.title.x=element_blank()) + # people can figure out what "OCT 31" is
+ylab(ylab) + # nice ylab
+ggtitle(title)
+}
+ggplotTime(df = rain_daily,
+yval = rain_daily$prcp,
+colour = 'deeppink4',
+ylab = 'daily precipitation, mm',
+title = 'average rain in Seattle (King County)'
+)
+rain_daily$prcp
+ggplot(rain_daily, aes(x = day, y = prcp %>% log)) + # plot the df, logged n n.tweets across day
+geom_line(colour = colour)
+ggplot(rain_daily, aes(x = day, y = prcp %>% log)) + # plot the df, logged n n.tweets across day
+geom_line(colour = 'deeppink4')
+tweets = read.csv('Data/seattle_twitter_hits2.txt')
+tweets_daily = tweets %>%
+mutate(date = ymd_hms(created_at)) %>% # turn date into a nice format
+arrange(date) %>%  # arrange by it
+group_by(day = floor_date(date, 'day') %>% as.Date) %>%  # strip h-m-s and group by days
+summarise(n.tweets = n()) %>%
+mutate(log.tweets = log(n.tweets)) # count how many tweets fall to one day
+ggplotTime = function(df, yval, colour, ylab, title){
+ggplot(df, aes(x = day, y = yval)) + # plot the df, logged n n.tweets across day
+geom_line(colour = colour) + # draw a line
+scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
+theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
+axis.title.x=element_blank()) + # people can figure out what "OCT 31" is
+ylab(ylab) + # nice ylab
+ggtitle(title)
+}
+ggplotTime(df = rain_daily,
+yval = rain_daily$prcp,
+colour = 'deeppink4',
+ylab = 'daily precipitation, mm',
+title = 'average rain in Seattle (King County)'
+)
+ggplotTime(df = tweets_daily,
+yval = tweets_daily$log.tweets,
+colour = 'goldenrod2',
+ylab = 'daily weather tweet count (logged)',
+title = 'tweets on weather round Seattle'
+)
+s_daily = tweets_daily %>% # take daily tweets
+merge(rain_daily) %>%  # merge with daily precipitation
+mutate(n.tweets = scales::rescale(log.tweets), # rescale variables so we can compare them
+prcp = scales::rescale(prcp)) %>%
+gather(variable, value, -day)
+ggplot(s_daily, aes(x = day, y = value, colour = variable)) + # plot the new df, logged n n.tweets across day
+geom_line() + # draw a line
+scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
+theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
+axis.title.x=element_blank(), # people can figure out what "OCT 31" is
+legend.position="top") + # put legend on too
+ylab('daily scaled extent') + # nice ylab
+scale_colour_manual(name = '', breaks = c('n.tweets', 'prcp'), labels = c('tweets (log(n))', 'rain (mm)'), values = c('goldenrod2','deeppink4')) # legend! no name, custom break labels, match colours with previous plots
+ggplot(s_daily, aes(x = day, y = value, colour = variable)) + # plot the new df, logged n n.tweets across day
+geom_line()
+s_daily = tweets_daily %>% # take daily tweets
+merge(rain_daily) %>%  # merge with daily precipitation
+mutate(log.tweets = scales::rescale(log.tweets), # rescale variables so we can compare them
+prcp = scales::rescale(prcp)) %>%
+gather(variable, value, -day)
+s_daily
+s_daily = tweets_daily %>% # take daily tweets
+merge(rain_daily) %>%  # merge with daily precipitation
+select(-n.tweets) %>% # we don't need this
+mutate(log.tweets = scales::rescale(log.tweets), # rescale variables so we can compare them
+prcp = scales::rescale(prcp)) %>%
+gather(variable, value, -day)
+ggplot(s_daily, aes(x = day, y = value, colour = variable)) + # plot the new df, logged n n.tweets across day
+geom_line() + # draw a line
+scale_x_date(date_breaks = '1 day', date_labels = '%b %d %a') + #have an x axis tick per day, in a month-day format
+theme(axis.text.x = element_text(angle = 90, vjust = 0.5),
+axis.title.x=element_blank(), # people can figure out what "OCT 31" is
+legend.position="top") + # put legend on too
+ylab('daily scaled extent') + # nice ylab
+scale_colour_manual(name = '', breaks = c('n.tweets', 'prcp'), labels = c('tweets (log(n))', 'rain (mm)'), values = c('goldenrod2','deeppink4')) # legend! no name, custom break labels, match colours with previous plots
+try(setwd('Work/Bristol/dplace/sccs_blog_post/sccs_blog_post'))
+try(setwd('~/Work/Bristol/dplace/sccs_blog_post/sccs_blog_post'))
+try(setwd('~/Work/Bristol/dplace/sccs_blog_post/sccs_blog_post'))
+ea = read.csv('cousin_paper/rehash/ea_tidy.csv') # ea in long format
+ea = read.csv('ea_tidy.csv') # ea in long format
+mapWorld <- map_data('world', wrap=c(-25,335), ylim=c(-55,75)) # pacific centered
+cousin = ea %>% #from the D-Place/Ethnographic atlas
+filter(title == 'Cousin marriages permitted', !is.na(code)) %>%  # look up cousin marriage
+mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
+select(society,family,region,marry_first_cousin,in_sccs)
+cousin = ea %>%
+filter(title == 'Descent: major type', !is.na(code)) %>%  # now look up descent
+mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>%  # mark societies with patrilineal descent
+select(society,family,region,patrilineal) %>%
+merge(cousin) # merge the two sets
+View(cousin)
+biggest_family <- cousin %>% filter(family!='') %>%
+group_by(family) %>%
+summarise(n = n()) %>%
+arrange(-n) %>%
+head(12) %>%
+pull(family)
+d_big <- d %>% filter(family %in% biggest_family)
+ea_big <- ea %>% filter(family %in% biggest_family)
+names(ea_big)
+cousin_big <- cousin %>%
+filter(family %in% biggest_family)
+names(cousin_big)
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = ea_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
+guides(colour = F) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
+scale_colour_manual(values = rainbow(12))
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
+guides(colour = F) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
+scale_colour_manual(values = rainbow(12))
+ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, d$lon)
+ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, ea$lon)
+cousin = ea %>% #from the D-Place/Ethnographic atlas
+filter(title == 'Cousin marriages permitted', !is.na(code)) %>%  # look up cousin marriage
+mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
+select(society,family,region,marry_first_cousin,in_sccs)
+cousin = ea %>%
+filter(title == 'Descent: major type', !is.na(code)) %>%  # now look up descent
+mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>%  # mark societies with patrilineal descent
+select(society,family,region,patrilineal) %>%
+merge(cousin) # merge the two sets
+ctable = cousin %>%
+group_by(patrilineal,marry_first_cousin) %>%
+summarise(n = n()) %>%
+dcast(patrilineal ~ marry_first_cousin)
+library(tidyverse) # for data wrangling
+cousin = ea %>% #from the D-Place/Ethnographic atlas
+filter(title == 'Cousin marriages permitted', !is.na(code)) %>%  # look up cousin marriage
+mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
+select(society,family,region,marry_first_cousin,in_sccs)
+cousin = ea %>%
+filter(title == 'Descent: major type', !is.na(code)) %>%  # now look up descent
+mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>%  # mark societies with patrilineal descent
+select(society,family,region,patrilineal) %>%
+merge(cousin) # merge the two sets
+ctable = cousin %>%
+group_by(patrilineal,marry_first_cousin) %>%
+summarise(n = n()) %>%
+dcast(patrilineal ~ marry_first_cousin)
+ctable = cousin %>%
+group_by(patrilineal,marry_first_cousin) %>%
+summarise(n = n()) %>%
+reshape2::dcast(patrilineal ~ marry_first_cousin)
+ctable
+biggest_family <- cousin %>% filter(family!='') %>%
+group_by(family) %>%
+summarise(n = n()) %>%
+arrange(-n) %>%
+head(12) %>%
+pull(family)
+cousin_big <- cousin %>%
+filter(family %in% biggest_family)
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
+guides(colour = F) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
+scale_colour_manual(values = rainbow(12))
+ea$lon2
+ea$lon
+cousin
+cousin %>% head
+ea %>% head
+ea = read.csv('Data/ea_tidy.csv') # ea in long format
+ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, ea$lon)
+ea$lon2
+cousin = ea %>% #from the D-Place/Ethnographic atlas
+filter(title == 'Cousin marriages permitted', !is.na(code)) %>%  # look up cousin marriage
+mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
+select(society,family,region,marry_first_cousin,in_sccs)
+cousin = ea %>%
+filter(title == 'Descent: major type', !is.na(code)) %>%  # now look up descent
+mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>%  # mark societies with patrilineal descent
+select(society,family,region,patrilineal) %>%
+merge(cousin) # merge the two sets
+cousin = ea %>% #from the D-Place/Ethnographic atlas
+filter(title == 'Cousin marriages permitted', !is.na(code)) %>%  # look up cousin marriage
+mutate(marry_first_cousin = ifelse(name == 'Any first cousins', T, F)) %>% # mark societies that allow first cousin marriage
+select(society,family,region,marry_first_cousin,in_sccs,lon2,lat)
+cousin = ea %>%
+filter(title == 'Descent: major type', !is.na(code)) %>%  # now look up descent
+mutate(patrilineal = ifelse(name == 'Patrilineal', T, F)) %>%  # mark societies with patrilineal descent
+select(society,family,region,patrilineal) %>%
+merge(cousin) # merge the two sets
+View(cousin)
+View(cousin_big)
+biggest_family <- cousin %>%
+filter(family!='') %>%
+group_by(family) %>%
+summarise(n = n()) %>%
+arrange(-n) %>%
+head(12) %>%
+pull(family)
+cousin_big <- cousin %>%
+filter(family %in% biggest_family)
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = marry_first_cousin)) +
+guides(colour = F) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+labs(caption = '[h]awaiian, [e]skimo, [i]roquois, [c]row/omaha, [s]udanese/descriptive. twelve largest language families.') +
+scale_colour_manual(values = rainbow(12))
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = family, label = ifelse(patrilineal, 'p','m'))) +
+guides(colour = F) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+labs(caption = 'main line of descent: [p]atrilineal or [m]atrilineal. twelve largest language families.') +
+scale_colour_manual(values = rainbow(12))
+cousin
+head(cousin)
+head(ea)
+ea = read.csv('Data/ea_tidy.csv') # ea in long format
+ea$lon2 <- ifelse(ea$lon < -20, ea$lon + 360, ea$lon)
+ea = mutate(ea, cousin_marriage =
+ifelse(EA023 %in% c(7,8), 1,
+ifelse(EA023 %in% c(11,12), 2,
+ifelse(EA023 %in% c(1,2,3,4,5,6,9,13), 3,
+ifelse(EA023 %in% c(10), 4,NA
+)))))
+ea = read.csv('Data/ea_tidy.csv') # ea in long format
+View(ea)
+labs(colour = 'descent type:', caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
+ggplot() +
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = d_big_descent, aes(x = lon2, y = lat, colour = descent_type, label = ifelse(marry_first_cousin, 'a','f'))) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+scale_colour_manual(values = c('violetred','gold4')) +
+scale_fill_continuous(guide = guide_legend()) +
+theme(legend.position="top") +
+guides(colour = guide_legend(override.aes = list(size=11))) +
+labs(colour = 'descent type:', caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = descent_type, label = ifelse(marry_first_cousin, 'a','f'))) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+scale_colour_manual(values = c('violetred','gold4')) +
+scale_fill_continuous(guide = guide_legend()) +
+theme(legend.position="top") +
+guides(colour = guide_legend(override.aes = list(size=11))) +
+labs(colour = 'descent type:', caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = patrilineal, label = ifelse(marry_first_cousin, 'a','f'))) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+scale_colour_manual(values = c('violetred','gold4')) +
+scale_fill_continuous(guide = guide_legend()) +
+theme(legend.position="top") +
+guides(colour = guide_legend(override.aes = list(size=11))) +
+labs(colour = 'descent type:', brackets = c('TRUE', 'FALSE'), labels = c('patrilineal', 'other'), caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = patrilineal, label = ifelse(marry_first_cousin, 'a','f'))) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+scale_colour_manual(values = c('violetred','gold4')) +
+scale_fill_continuous(guide = guide_legend()) +
+theme(legend.position="top") +
+guides(colour = guide_legend(override.aes = list(size=11))) +
+labs(colour = 'descent type:', breaks = c('TRUE', 'FALSE'), labels = c('patrilineal', 'other'), caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
+ggplot() +
+geom_polygon(data = mapWorld, aes(x=long, y = lat, group = group)) +
+geom_label(data = cousin_big, aes(x = lon2, y = lat, colour = patrilineal, label = ifelse(marry_first_cousin, 'a','f'))) +
+theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks=element_blank(), axis.title.y=element_blank(), axis.text.y=element_blank()) +
+scale_fill_continuous(guide = guide_legend()) +
+theme(legend.position="top") +
+guides(colour = guide_legend(override.aes = list(size=11))) +
+scale_colour_manual(name = '', breaks = c('TRUE', 'FALSE'), labels = c('patrilineal', 'other'), values = c('violetred','gold4')) +
+labs(caption = 'marriage to first cousin: [a]llowed / [f]orbidden. twelve largest language families.')
diff --git a/Figs/unnamed-chunk-13-1.png b/Figs/unnamed-chunk-13-1.png
diff --git a/Figs/unnamed-chunk-25-1.png b/Figs/unnamed-chunk-25-1.png
diff --git a/Figs/unnamed-chunk-5-1.png b/Figs/unnamed-chunk-5-1.png