Mapping Collaborations in Research Projects Funded by the SNSF in 2017.

Data

SNSF grant data is here.

# grants <- read_rds("grants.rds.bz2") %>%
grants <- rio::import("http://p3.snf.ch/P3Export/P3_GrantExport.csv") %>% 
  janitor::clean_names() %>%
  select(project_number, start_date, end_date) %>%
  mutate(start_date = dmy(start_date), end_date = dmy(end_date)) %>%
  filter(end_date >= TIME_STAMP, start_date <= TIME_STAMP)

# people <- read_rds("ppl.rds.bz2") %>%
people <- rio::import("http://p3.snf.ch/P3Export/P3_PersonExport.csv") %>%
  janitor::clean_names() %>%
  select(
    person_id_snsf, 
    institute_place,
    projects_as_responsible_applicant,
    projects_as_applicant, 
    projects_as_partner,
    projects_as_practice_partner,
    projects_as_employee) %>%
  # for consistency
  unite(project_number, starts_with("projects"), sep = ";") %>%
  # remove missing affiliations
  filter(institute_place != "") %>%
  mutate(
    institute_place = str_replace_all(
      institute_place,
      "^[A-Z]{2}[ ]| [(](.*)[)]| Cedex(.*)?| [0-9]{1,2}", ""),
    # unite() includes NAs...
    project_number = str_replace_all(project_number, "NA", ""),
    project_number = str_replace_all(project_number, "[;]+", ";"),
    project_number = str_replace_all(project_number, "^[;]|[;]$", ""),
    # to unnest
    project_number = str_split(project_number, ";")) %>% 
  unnest() %>%
  # for consistency with grants
  distinct() %>%
  semi_join(grants, by = "project_number")

# wrapper function to count the number
# of distinct *institute_place* per *project_number*
loc_count <- function(dat) {
  group_by(dat, project_number) %>%
    summarise(n_loc = n_distinct(institute_place))
}

# network core: grants involving multiple places
core <- people %>%
  select(-person_id_snsf) %>%
  distinct() %>%
  semi_join(filter(loc_count(people), n_loc > 1), by = "project_number") %>%
  arrange(project_number)

head(core)
##   institute_place project_number
## 1        Konstanz         128565
## 2            Bern         128565
## 3          Zürich         135192
## 4      St. Gallen         135192
## 5      St. Gallen         135970
## 6         Rostock         135970

We need geocodes for all the places listed in the dataset. Information on how to get geocode data in R is given here and here. In essence, I use the google maps API as a fallback of the openstreetmaps API. To save the effort of writing html requests, I use dedicated R packages to query map data. And to speed up the analysis, I store the geocodes in geocodes.csv.

# initialize geocodes as a tibble
geocodes <- tibble(
  place = sort(unique(core$institute_place)),
  lat = rep(NA_real_, n_distinct(core$institute_place)),
  lon = rep(NA_real_, n_distinct(core$institute_place)),
  addr = rep(NA_character_, n_distinct(core$institute_place)),
  id = rep(NA_character_, n_distinct(core$institute_place)))

p_load_gh("hrbrmstr/nominatim", "ggmap") # ggmap needs `libpng16-dev` (on linux)
osm_key <- readLines("mapquest.key")

# openstreetmap api
osm <- function(query, osm_key) {
  r <- osm_search_spatial(query, limit = 1, key = osm_key)
  if (!is.null(r[[1]])) {
    c(r[[1]]$place_id, r[[1]]$display_name, r[[1]]$lat, r[[1]]$lon)
  } else return(NA)
}

# googlemaps api (2500 reqs/day, 50 reqs/sec max)
google <- function(query) {
  r <- geocode(query, output = "all")
  if (r$status == "OK") {
    c(r$results[[1]]$place_id, r$results[[1]]$formatted_address,
      r$results[[1]]$geometry$location$lat, r$results[[1]]$geometry$location$lng)
  } else return(NA)
}

for (k in seq_along(geocodes$place)) {
  cat("   ........ ", k, ": ", geocodes$place[k], "\n")
  info <- osm(iconv(geocodes$place[k], to="ASCII//TRANSLIT"), osm_key)
  cat("   osm info: ", info, "\n")
  # fallback
  if (is.na(info)) {
    info <- google(geocodes$place[k])
    cat("    google: ", info, "\n")
  }
  # store info in meta
  if (!is.na(info)) {
    geocodes$id[k] = info[1]
    geocodes$addr[k] = info[2]
    geocodes$lat[k] = as.numeric(info[3])
    geocodes$lon[k] = as.numeric(info[4])
  }
}
# it works despite of some warnings...
rio::export(geocodes, "geocodes.csv")

Collaborations within Switzerland

Credits: kateto.net.

geocodes <- rio::import("geocodes.csv") %>% drop_na()

core <- core %>% 
  left_join(geocodes, by = c("institute_place" = "place")) %>%
  mutate(
    country = map_chr(strsplit(addr, ", "), tail, n = 1),
    # unify names between google maps and openstreetmaps
    country = str_replace_all(country, "^United Kingdom$", "UK"),
    country = str_replace_all(country, "^Russian Federation$", "Russia"),
    country = str_replace_all(country, "^United States of America$", "USA"),
    country = str_replace_all(country, "^The Netherlands$", "Netherlands"))

# build graph elements ----------------------------------------------------

make_nodes <- function(df_core) {
  group_by(df_core, institute_place) %>%
  summarise(x = head(lon, 1), y = head(lat, 1), size = n()) %>%
  arrange(desc(size))
}

make_edges <- function(df_core) {
  triangulation <- function(proj_no, df) {
    links <- filter(df, project_number == proj_no) %>%
      select(institute_place) %$%
      combn(sort(institute_place), m = 2)
    data.frame(
    number = rep(proj_no, ncol(links)),
    from = links[1, ],
    to = links[2,],
    stringsAsFactors = FALSE)
  }
  unique_numbers <- unique(df_core$project_number)
  edges_ch <- purrr::map(unique_numbers, triangulation, df = df_core)
  do.call(bind_rows, edges_ch)
}

# Switzerland -------------------------------------------------------------

core_ch <- filter(core, country == "Switzerland")
# swiss core: projects involving multiple places in Switzerland
core_ch <- core_ch %>%
  semi_join(filter(loc_count(core_ch), n_loc > 1), by = "project_number")

# nodes (places) ----------------------------------------------------------
nodes_ch <- make_nodes(core_ch)
# edges (grants) ----------------------------------------------------------
edges_ch <- make_edges(core_ch)

# add geocode metadata and group
edges_ch <- edges_ch %>%
  left_join(select(nodes_ch, -size), by = c("from" = "institute_place")) %>%
  rename(x1 = x, y1 = y) %>%
  left_join(select(nodes_ch, -size), by = c("to" = "institute_place")) %>%
  rename(x2 = x, y2 = y) %>%
  group_by(from, to) %>%
  summarise(
    x1 = head(x1, 1), y1 = head(y1, 1),
    x2 = head(x2, 1), y2 = head(y2, 1),
    strength = n())

# mapping -----------------------------------------------------------------

p_load("maps", "mapdata", "geosphere")

# color ramp
make_ramp <- function(alpha1 = .5, alpha2 = .2, 
                      col1 = "#194CB2", col2 = "#194CB2", 
                      n_shades = 100) {
  col1 = adjustcolor(col1, alpha.f = alpha1)
  col2 = adjustcolor(col2, alpha.f = alpha2)
  colorRampPalette(c(col2, col1), alpha = TRUE)(n_shades)
}

# map nodes
add_nodes <- function(nodes, ramp, cex_scale = 4) {
  with(nodes, {
    # scale node size
    s <- size /  max(size)
    points(x = x, y = y, 
           pch = 16, cex = cex_scale * sqrt(s),
           col = ramp[round(100 * s)])
  })
}

# map edges
add_edges <- function(edges, ramp, lwd_scale = 6) {
  with(edges, {
    for (k in 1:nrow(edges))  {
      arc <- gcIntermediate(c(x1[k], y1[k]), c(x2[k], y2[k]), n = 100, addStartEnd = TRUE)
      s <- strength[k] / max(strength)
      lines(arc, col = ramp[round(100 * s)], lwd = lwd_scale * sqrt(s))
    }
  })
}

# svg("map_ch.svg", width = 12, height = 7)
par(mar = c(0, 0, 0, 0))
maps::map(database = "worldHires", regions = "Switzerland", 
          lty = 0, fill = TRUE, col = rgb(0, 0, 0, .1))

edge_col <- make_ramp()
node_col <- gray.colors(100, start = 0.6, end = 0.3, alpha = .7)

add_nodes(nodes_ch, ramp = node_col)
add_edges(edges_ch, ramp = edge_col)

# label university cities
uni <- c("Zürich", "Lausanne", "Bern", "Geneva","Basel", "Fribourg", "Neuchâtel", "St. Gallen", "Lugano", "Lucerne")
uni_nodes <- filter(nodes_ch, institute_place %in% uni)
with(uni_nodes, {
  text(x, y + 0.03, institute_place, 
    pos = 3, cex = .7, col = rgb(0, 0, 0, .85))
})

legend_points <- tibble(institute_place = c("50", "500"),
                 x = c(9.75, 9.75),
                 y = c(46.05, 45.9),
                 size = c(50, 500))

with(legend_points, {
  text(x + 0.08, y, str_c(institute_place, " Collabs"), 
    pos = 4, cex = .7, col = rgb(0, 0, 0, .85))
})

add_nodes(legend_points, ramp = node_col)

# invisible(dev.off())

Collaborations within Europe

close_countries <- c(
  "Switzerland", "Austria", "Belgium", "Bulgaria", "Croatia",
  "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France",
  "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", 
  "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal",
  "Romania", "Slovakia", "Slovenia", "Spain", "Sweden", "UK",
  "Serbia", "Bosnia and Herzegovina", "Moldova", "Norway",
  "Georgia", "Turkey", "Armenia", "Ukraine", "Iceland", "Montenegro",
  "Albania", "Macedonia", "Lichtenstein", "Andorra")
not_mapped <- c("Israel", "Morocco", "Algeria", "Tunisia", "Libya", 
                "Egypt", "Jordan", "Lebanon", "Syria", "Russia")

core_eu <- filter(core, country %in% c(close_countries, not_mapped))
core_eu <- core_eu %>%
  semi_join(filter(loc_count(core_eu), n_loc > 1), by = "project_number")

nodes_eu <- make_nodes(core_eu)
edges_eu <- make_edges(core_eu)

# add geocode metadata and group
edges_eu <- edges_eu %>%
  left_join(select(nodes_eu, -size), by = c("from" = "institute_place")) %>%
  rename(x1 = x, y1 = y) %>%
  left_join(select(nodes_eu, -size), by = c("to" = "institute_place")) %>%
  rename(x2 = x, y2 = y) %>%
  group_by(from, to) %>%
  summarise(
    x1 = head(x1, 1), y1 = head(y1, 1),
    x2 = head(x2, 1), y2 = head(y2, 1),
    strength = n())

nodes_eu <- anti_join(nodes_eu, nodes_ch, by = "institute_place")
edges_eu <- edges_eu %>%
  anti_join(edges_ch, by = c("from" = "from", "to" = "to")) %>%
  drop_na()

# svg("map_eu.svg", width = 12, height = 7)
par(mar = c(0, 0, 0, 0))
# maps::map(database = "worldHires", 
maps::map(regions = str_c(close_countries, collapse = "|"), 
          lty = 0, fill = TRUE, col = rgb(0, 0, 0, .1), 
          xlim = c(-25, 50), ylim = c(25, 70))

edge_col <- make_ramp(0.5, 0.1)
add_nodes(nodes_eu, ramp = node_col, cex_scale = 1.2)
add_edges(edges_eu, ramp = edge_col, lwd_scale = 1.2)

# invisible(dev.off())

Collaborations with the rest of the world

nodes_io <- make_nodes(core)
edges_io <- make_edges(core)
edges_io <- edges_io %>%
  left_join(select(nodes_io, -size), by = c("from" = "institute_place")) %>%
  rename(x1 = x, y1 = y) %>%
  left_join(select(nodes_io, -size), by = c("to" = "institute_place")) %>%
  rename(x2 = x, y2 = y) %>%
  group_by(from, to) %>%
  summarise(
    x1 = head(x1, 1), y1 = head(y1, 1),
    x2 = head(x2, 1), y2 = head(y2, 1),
    strength = n())

nodes_io <- nodes_io %>%
#   anti_join(nodes_eu, by = "institute_place") %>%
  anti_join(nodes_ch, by = "institute_place")
edges_io <- edges_io %>%
#   anti_join(edges_eu, by = c("from" = "from", "to" = "to")) %>%
  anti_join(edges_ch, by = c("from" = "from", "to" = "to")) %>%
  drop_na()

# svg("map_int.svg", width = 12, height = 7)
par(mar = c(0, 0, 0, 0))
maps::map("world", regions = "(?!Antarctica)", 
          lty = 0, fill = TRUE, col = rgb(0, 0, 0, .1), wrap = TRUE)
# map nodes
edge_col <- make_ramp(0.6, 0.3)
add_nodes(nodes_io, ramp = node_col, cex_scale = .4)
add_edges(edges_io, ramp = edge_col, lwd_scale = .25)

# invisible(dev.off())

Source. Last updated: 2018-02-13.