Skip to contents
library(ggpedigree) # ggPedigree lives here
library(BGmisc) # helper utilities & example data
library(ggplot2) # ggplot2 for plotting
library(viridis) # viridis for color palettes
library(tidyverse) # for data wrangling

Constructing Custom Pedigrees for Publication

Here we demonstrate how to create a custom pedigree using the ggpedigree package. The data shown here were generated using the simulatePedigree() function from the {BGmisc} package, which is the parent package to {ggpedigree}. These simulated pedigrees were used in a study evaluating statistical power and estimation bias for a variance decomposition model that includes mitochondrial DNA (mtDNA) effects.

The simulation generated thousands of extended pedigree structures varying in depth, sibship size, mating structure, and maternal lineage overlap. The example below shows one of the simulated pedigrees and is the version included in the final manuscript:

Detecting mtDNA effects with an Extended Pedigree Model: An Analysis of Statistical Power and Estimation Bias Xuanyu Lyu, S. Alexandra Burt, Michael D. Hunter, Rachel Good, Sarah L. Carroll, S. Mason Garrison Preprint available at: https://doi.org/10.1101/2024.12.19.629449

The structure includes multiple generations, sibling sets, and overlapping parental lineages, and was chosen to illustrate the complexity of the simulated pedigrees used in the power study.

Preparing the data

Each row represents one individual. Variables include personID, momID, dadID, sex, and famID. The proband variable is included to demonstrate status overlays. For plotting, we normalize identifiers in family 1 to avoid ID collisions across families.

Click to expand pedigree setup
library(tibble)
library(dplyr)
pedigree_df <- tribble(
  ~personID, ~momID, ~dadID, ~sex, ~famID,
  10011, NA, NA, 0, 1,
  10012, NA, NA, 1, 1,
  10021, NA, NA, 1, 1,
  10022, 10011, 10012, 1, 1,
  10023, 10011, 10012, 0, 1,
  10024, NA, NA, 0, 1,
  10025, NA, NA, 0, 1,
  10026, 10011, 10012, 0, 1,
  10027, 10011, 10012, 1, 1,
  10031, 10023, 10021, 0, 1,
  10032, 10023, 10021, 1, 1,
  10033, 10023, 10021, 1, 1,
  10034, 10023, 10021, 1, 1,
  10035, 10023, 10021, 0, 1,
  10036, 10024, 10022, 1, 1,
  10037, 10024, 10022, 0, 1,
  10038, 10025, 10027, 1, 1,
  10039, 10025, 10027, 0, 1,
  10310, 10025, 10027, 1, 1,
  10311, 10025, 10027, 1, 1,
  10312, 10025, 10027, 0, 1,
  10011, NA, NA, 0, 2,
  10012, NA, NA, 1, 2,
  10021, NA, NA, 0, 2,
  10022, 10011, 10012, 0, 2,
  10023, 10011, 10012, 1, 2,
  10024, 10011, 10012, 1, 2,
  10025, NA, NA, 1, 2,
  10026, 10011, 10012, 0, 2,
  10027, NA, NA, 1, 2,
  10031, 10021, 10023, 1, 2,
  10032, 10021, 10023, 0, 2,
  10033, 10021, 10023, 1, 2,
  10034, 10022, 10025, 0, 2,
  10035, 10022, 10025, 0, 2,
  10036, 10022, 10025, 1, 2,
  10310, 10022, 10025, 1, 2,
  10037, 10026, 10027, 0, 2,
  10038, 10026, 10027, 0, 2,
  10039, 10026, 10027, 0, 2,
  10311, 10026, 10027, 1, 2,
  10312, 10026, 10027, 1, 2
) %>%
  mutate(
    cleanpersonID = personID - 10000,
    personID = ifelse(famID == 1, personID - 10000, personID),
    momID = ifelse(famID == 1 & !is.na(momID), momID - 10000, momID),
    dadID = ifelse(famID == 1 & !is.na(dadID), dadID - 10000, dadID),
    proband = case_when(
      personID %in% c(11, 22, 23, 26, 27, 31, 32, 33, 34, 35) ~ TRUE,
      personID %in% c(
        10011, 10022, 10022, 10023, 10024, 10026,
        10034, 10035, 10036, 10310,
        10037, 10038, 10039, 10311,
        10312
      ) ~ TRUE,
      TRUE ~ FALSE
    )
  )


df_fig1 <- tribble(
  ~personID, ~momID, ~dadID, ~sex, ~famID,
  10011, NA, NA, 0, 1,
  10012, NA, NA, 1, 1,
  10021, NA, NA, 1, 1,
  10022, 10011, 10012, 1, 1,
  10023, 10011, 10012, 0, 1,
  10024, NA, NA, 0, 1,
  10025, 10011, 10012, 0, 1,
  10027, NA, NA, 1, 1,
  10031, 10023, 10021, 0, 1,
  10032, 10023, 10021, 1, 1,
  10035, 10023, 10021, 0, 1,
  10036, 10024, 10022, 1, 1,
  10037, 10024, 10022, 0, 1,
  10038, 10025, 10027, 1, 1
) %>%
  mutate(
    proband = case_when(
      personID %in% c(10011, 10022, 10023, 10025, 10031, 10032, 10035, 10038) ~ TRUE,
      TRUE ~ FALSE
    ),
    mtdnaline2 = case_when(
      personID %in% c(10024, 10036, 10037) ~ TRUE,
      TRUE ~ FALSE
    ),
  )

Plotting the pedigree

fig1 <- ggPedigree(
  df_fig1,
  famID = "famID",
  personID = "personID",
  status_column = "proband",
  debug = TRUE,
  config = list(
    code_male = 1,
    sex_color_include = FALSE,
    apply_default_scales = FALSE,
    label_method = "geom_text",
    label_col = "personID",
    point_size = 5,
    outline_include = TRUE,
    status_code_affected = TRUE,
    status_code_unaffected = FALSE,
    generation_height = 1,
    generation_width = 1,
    status_affected_shape = 4,
    segment_spouse_color = "black",
    segment_sibling_color = "black",
    segment_parent_color = "black",
    segment_offspring_color = "black",
    outline_multiplier = 1.25,
    segment_linewidth = .5
  )
)
#> Debug mode is ON. Debugging information will be printed.
#> Connections calculated. Number of connections: 14
# fig1

fig1$plot + geom_point(aes(x = x_pos, y = y_pos),
  color = "cornflowerblue", size = 2,
  data = fig1$data %>% dplyr::filter(mtdnaline2 == TRUE)
) +
  scale_shape_manual(
    values = c(16, 15, 14),
    labels = c("Female", "Male", "Unknown")
  ) +
  guides(shape = "none") + scale_color_manual(
    values = c("pink", "white")
  ) +

  # discrete = TRUE,
  # labels = c("TRUE", "FALSE"),
  #  name = ""
  # ) +
  theme(
    strip.text = element_blank(),
    legend.position = "none"
  )

p2 <- ggPedigree(
  pedigree_df,
  famID = "famID",
  personID = "personID",
  status_column = "proband",
  #  debug = TRUE,
  config = list(
    code_male = 1,
    sex_color_include = FALSE,
    apply_default_scales = FALSE,
    label_method = "geom_text",
    label_include = TRUE,
    label_column = "cleanpersonID",
    status_code_affected = TRUE,
    status_code_unaffected = FALSE,
    generation_height = 1,
    generation_width = 1,
    status_affected_shape = 4,
    segment_spouse_color = "black",
    segment_sibling_color = "black",
    segment_parent_color = "black",
    segment_offspring_color = "black"
  )
)

We finish by adjusting the legend and shape scale for visual clarity:

p2 + scale_shape_manual(
  values = c(16, 15, 14),
  labels = c("Female", "Male", "Unknown")
) +
  guides(shape = "none") + scale_color_viridis(
    discrete = TRUE,
    labels = c("TRUE", "FALSE"),
    name = "Founding MtDNA Line"
  ) +
  facet_wrap(~famID, scales = "free", shrink = TRUE) +
  theme(
    strip.text = element_blank(),
    legend.position = "bottom"
  )

More Complex Pedigree Plots with ggPedigree

In this section, we demonstrate how to create a more complex pedigree plot with multiple families. We use the inbreeding dataset from the BGmisc package, which contains several multigenerational pedigrees with consanguinity. Note that in these plots that some individuals may appear in multiple places within the pedigree. This is common in large pedigrees, especially when there are overlapping generations or multiple marriages. Here the colors are set to be the same for all segments, except for self-loops, which are colored purple.

library(BGmisc) # helper utilities & example data

data("inbreeding")


df <- inbreeding # multigenerational pedigree with consanguinity


# df  <- dplyr::filter(df, famID %in% c(5, 7))


p <- ggPedigree(
  df,
  famID = "famID",
  personID = "ID",
  status_column = "proband",
  #  debug = TRUE,
  config = list(
    code_male = 0,
    sex_color_include = FALSE,
    status_code_affected = TRUE,
    status_code_unaffected = FALSE,
    generation_height = 1,
    point_size = 2,
    generation_width = 1,
    status_affected_shape = 4,
    segment_self_color = "purple",
    segment_self_linewidth = .5
  )
)

p + facet_wrap(~famID, scales = "free") #+ scale_color_viridis(

#   discrete = TRUE,
#   labels = c("TRUE", "FALSE")
#  )  + theme_bw(base_size = 14)  +  guides(colour="none", shape="none")

Potter Pedigree Plots

These figures are used in the manuscript to illustrate the use of the ggpedigree package for plotting complex pedigrees. The first figure shows a simplified version of the Potter family pedigree, while the second figure includes additional family members and relationships.

library(BGmisc)
library(ggpedigree)
library(tidyverse)
library(patchwork) # for combining plots
data("potter") # load the potter pedigree data

df_potter <- potter %>%
  mutate(
    name = case_when(
      personID == 1 ~ "Vernon",
      personID == 2 ~ "Marjorie",
      personID == 3 ~ "Petunia",
      personID == 4 ~ "Lily",
      personID == 5 ~ "James",
      personID == 6 ~ "Dudley",
      personID == 7 ~ "Harry",
      personID == 8 ~ "Ginny",
      personID == 9 ~ "Arthur",
      personID == 10 ~ "Molly",
      personID == 11 ~ "Ron",
      personID == 12 ~ "Fred",
      personID == 13 ~ "George",
      personID == 14 ~ "Percy",
      personID == 15 ~ "Charlie",
      personID == 16 ~ "Bill",
      personID == 17 ~ "Hermione",
      personID == 18 ~ "Fleur",
      personID == 19 ~ "Gabrielle",
      personID == 20 ~ "Audrey",
      personID == 21 ~ "James",
      personID == 22 ~ "Albus",
      personID == 23 ~ "Lily",
      personID == 24 ~ "Rose",
      personID == 25 ~ "Hugo",
      personID == 26 ~ "Victoire",
      personID == 27 ~ "Dominique",
      personID == 28 ~ "Louis",
      personID == 29 ~ "Molly",
      personID == 30 ~ "Lucy",
      personID == 101 ~ "Father",
      personID == 102 ~ "Mother",
      personID == 103 ~ "Father",
      personID == 104 ~ "Mother",
      personID == 105 ~ "Father",
      personID == 106 ~ "Mother"
    )
  )

m1 <- ggPedigree(df_potter %>% filter(personID %in% c(1:7, 101:104)),
  famID = "famID",
  personID = "personID",
  config = list(
    label_include = TRUE,
    label_column = "name",
    point_size = 5,
    # outline_include = TRUE,
    focal_fill_personID = 7,
    outline_multiplier = 1.5,
    segment_linewidth = 0.5,
    label_text_size = 2.5,
    focal_fill_include = TRUE,
    label_nudge_y = -0.30,
    focal_fill_high_color = "#052f60",
    focal_fill_mid_color = "#67aacf",
    focal_fill_low_color = "#FCFDFD",
    focal_fill_force_zero = TRUE,
    label_method = "geom_text",
    focal_fill_na_value = "grey10",
    focal_fill_scale_midpoint = 0.40,
    #   focal_fill_n_breaks = 15,
    focal_fill_component = "additive",
    focal_fill_method = "gradient2",
    focal_fill_legend_title = "Additive\nGenetic\nRelatives \nof Harry",
    sex_legend_show = FALSE,
    # "additive",\
    # label_text_angle = -35,
    sex_color_include = FALSE
  ) # highlight Harry Potter
  # config  = list(segment_mz_color = NA) # color for monozygotic twins
) + guides(shape = "none") + theme(
  plot.title = element_blank(),
  plot.title.position = "plot"
) + coord_cartesian(ylim = c(3.25, 1), clip = "off")

m2 <- ggPedigree(df_potter,
  famID = "famID",
  personID = "personID",
  config = list(
    label_include = TRUE,
    label_column = "name",
    point_size = 5,
    # outline_include = TRUE,
    focal_fill_personID = 7,
    outline_multiplier = 1.5,
    segment_linewidth = 0.5,
    label_text_size = 2.5,
    label_nudge_y = -0.25,
    label_nudge_x = .1,
    focal_fill_include = TRUE,
    focal_fill_high_color = "#052f60",
    focal_fill_mid_color = "#67aacf",
    focal_fill_low_color = "#FCFDFD",
    focal_fill_force_zero = TRUE,
    label_method = "geom_text",
    focal_fill_na_value = "grey10",
    focal_fill_scale_midpoint = 0.40,
    #   focal_fill_n_breaks = 15,
    label_text_angle = -35,
    focal_fill_component = "additive",
    focal_fill_method = "gradient2",
    focal_fill_legend_title = "Additive\nGenetic\nRelatives \nof Harry",
    sex_legend_show = FALSE,
    # "additive",
    sex_color_include = FALSE
  ) # highlight Harry Potter
  # config  = list(segment_mz_color = NA) # color for monozygotic twins
) + theme(
  legend.position = "none",
  plot.title = element_blank(),
  plot.title.position = "plot"
) + coord_cartesian(ylim = c(4.25, 1), clip = "off")
m1 + m2 +
  plot_layout(
    ncol = 1, heights = c(1.1, 2.5),
    guides = "collect", tag_level = "new"
  ) +
  plot_annotation(
    tag_levels = list(c("(a)", "(b)")),
    theme = theme(plot.margin = margin(0, 0, 0, 0))
  ) +
  guides(shape = "none") &
  theme(
    legend.position = "right",
    plot.margin = unit(c(0, 0, 0.0, 0), "lines")
  )