Building a Balanced Portfolio

Building a balanced portfolio is not as easy as it might seem.

library(tidyverse)
library(lubridate)
library(gt)
library(echarts4r)
library(tidyquant)
library(arrow)
library(here)
filter <- dplyr::filter
lag <- dplyr::lag

file_prices <- list.files(here("static", "data"),
           pattern = "ETF prices", full.names = TRUE) %>% max()

prices <- read_csv(file_prices, col_types = cols()) %>% 
      select(ticker, date, adjusted)

file_t_note_rates <- list.files(here("static", "data"),
           pattern = "T Note Rates", full.names = TRUE) %>% max()

t_note_rates <- read_csv(file_t_note_rates, col_types = cols()) %>% 
      select(ticker, date, monthly_rate)

Data

The raw data consists of daily prices for seven ETFs:

  • AGG: U.S. total bond market
  • EEM: Emerging Markets
  • EFA: International equities (excluding the U.S. and Canada)
  • GSG: Commodities
  • IEF: 7-10 Year Treasury
  • IYR: U.S. Real Estate
  • VTI: All U.S. stocks

The monthly returns are then computed and an index starting at 1 is created for each ETF for plotting purposes.

Since each ETF has a different inception date and therefore time span, we will set the time span of the analysis to be that of the span of the ETF with the most recent inception date.

# Get monthly Treasury note rates
rf_monthly <-
      t_note_rates %>%
      select(-ticker) %>%
      complete(date = seq.Date(
            from = min(ymd(date)),
            to = max(ymd(date)),
            by = "day"
      )) %>%
      arrange(date) %>%
      fill(monthly_rate) %>%
      slice(xts::endpoints(date, on = "months"))

monthly_prices <- 
      prices %>%
      filter(!ticker %in% c("VWO", "VNQ", "^IXIC")) %>% 
      complete(date = seq.Date(
            from = min(ymd(date)),
            to = max(ymd(date)),
            by = "day"
      )) %>%
      arrange(ticker, date) %>%
      group_by(ticker) %>%
      fill(adjusted) %>% 
      slice(xts::endpoints(date, on = "months")) %>%
      arrange(date) %>% 
      mutate(ticker = case_when(ticker == "AGG" ~ "us_bonds",
                                ticker == "EEM" ~ "emerging_markets",
                                ticker == "EFA" ~ "global_stocks",
                                ticker == "GSG" ~ "commodities",
                                ticker == "IEF" ~ "treasury_notes",
                                ticker == "IYR" ~ "us_real_estate",
                                ticker == "VTI" ~ "us_stocks",
                                TRUE ~ NA_character_))

start_date <-
  monthly_prices %>%
  group_by(ticker) %>%
  filter(date == min(date)) %>%
  pull(date) %>%
  max()

end_date <-
  monthly_prices %>%
  group_by(ticker) %>%
  filter(date == max(date)) %>%
  pull(date) %>%
  min() %>%
  round_date(unit = "months") %>%
  {
    . - days(1)
  } %>%
  as.character()

returns <-
      monthly_prices %>%
      group_by(ticker) %>%
      mutate(return = adjusted / lag(adjusted) - 1) %>%
      filter(date >= start_date, date <= end_date) %>%
      mutate(index = adjusted / adjusted[1]) %>%
      arrange(ticker, date) %>%
      ungroup() %>% 
      drop_na() %>%
      left_join(rf_monthly %>% rename(Rf = monthly_rate), by = "date")
      

returns %>% 
  janitor::clean_names(case = "title") %>% 
  rename(`Asset Class` = Ticker) %>% 
  mutate(across(c("Adjusted", "Return", "Index"), ~round(.x, 2))) %>% 
  mutate(across("Rf", ~round(.x, 4))) %>% 
  group_by(`Asset Class`) %>% 
  slice_tail(n=2) %>%
  ungroup() %>% 
  gt() %>% 
  tab_header(
    title = "Returns of Asset Classes",
    subtitle = md("*How did each asset class perform?*")
  )
Returns of Asset Classes
How did each asset class perform?
Date Asset Class Adjusted Return Index Rf
2022-04-30 commodities 23.68 0.05 0.47 0.0024
2022-05-31 commodities 25.04 0.06 0.49 0.0024
2022-04-30 emerging_markets 42.02 -0.06 1.81 0.0024
2022-05-31 emerging_markets 42.28 0.01 1.82 0.0024
2022-04-30 global_stocks 67.15 -0.07 1.66 0.0024
2022-05-31 global_stocks 68.49 0.02 1.69 0.0024
2022-04-30 treasury_notes 102.56 -0.04 1.85 0.0024
2022-05-31 treasury_notes 103.19 0.01 1.86 0.0024
2022-04-30 us_bonds 102.50 -0.04 1.68 0.0024
2022-05-31 us_bonds 103.28 0.01 1.70 0.0024
2022-04-30 us_real_estate 103.38 -0.04 2.58 0.0024
2022-05-31 us_real_estate 98.79 -0.04 2.47 0.0024
2022-04-30 us_stocks 206.88 -0.09 4.42 0.0024
2022-05-31 us_stocks 206.36 0.00 4.41 0.0024
returns_xts <-
      returns %>% 
      select(date, ticker, return) %>% 
      split(.$ticker) %>% 
      map(~xts(x = .x$return, order.by = .x$date)) %>% 
      reduce(merge) %>% 
      na.omit() %>%
      setNames(returns %>% distinct(ticker) %>% pull())

# Calculate ther period for the returns data
time_period <- (as.Date(max(returns$date)) - as.Date(min(returns$date))) %>% as.numeric()
   
tickers <- returns %>% distinct(ticker) %>% pull()

The data consists of tables with columns for dates, ticker, monthly return, and index value. The index value starts at 1 for all tickers and grows according to each asset’s monthly return.

The data spans 15 years and 10 months.

Asset Performance

How did each asset perform individually?

returns %>%
      group_by(ticker) %>%
      e_chart(x = date) %>%
      e_line(serie = index,
             symbol = 'emptyCircle',
             symbolSize = 1) %>%
      e_tooltip(trigger = "axis", backgroundColor = 'white') %>%
      e_grid(right = "15%", top = "25%") %>%
      e_title(text = "Growth of Select ETFs",
              subtext = "How did each asset class perform?",
              # left = "center",
              left = "3%",
              top = "3%") %>%
      e_legend(orient = "horizontal",
               # type = "scroll",
               top = "15%",
               right = "5",
      ) %>%
      e_datazoom(type = "slider",
                 start = 0,
                 end = 100) %>%
      e_theme("shine") %>%
      e_toolbox_feature("dataZoom", selector = list(start = '20',
                                                    end = '80')) %>%
      e_toolbox_feature(feature = "reset") %>%
      e_toolbox_feature("dataView") %>%
      e_toolbox_feature("saveAsImage") %>%
      e_legend(selector = list(
            list(type = 'inverse', title = 'Invert'),
            list(type = 'all', title = 'Reset')
      ))

Over the period, there were notable performance divergences: - Commodities had disappointing performance continuously losing value over the period, starting at 1 and slowly declining to about 0.5. - Emerging markets had unstable growth, with an index starting at a value of 1 and eventually reaching a value of about 1.8. - Global stocks also had unstable growth, with an index starting at a value of 1 and reaching a value of about 1.6. - The Treasury note portfolio had stable growth, starting at a value of 1 and ending at a value of about 1.85, outperforming commodities, emerging markets, and global stocks. - U.S. bonds also experienced modest, stable growth starting with an index value of 1 and reaching a value of about 1.68, slightly underperforming the Treasury note ETF. - U.S. real estate experienced high volatility at the beginning and end of the period, but overall grew from a value of 1 to a value of about 2.6, outperforming all previously mentioned asset classes. - U.S. stocks experienced a mostly stable growth, with the index starting at a value of 1 to a value of about 4.42. There was modest volatility during the period.

Based solely on total return, it would have been better to have invested all one’s money in the U.S. stock market over the period. Of course, this could not have been known prior to the period, so investors often make more diversified portfolios in order to spread out their risks.

Return Profiles

returns %>% 
  mutate(ticker = case_when(ticker == "commodities" ~ "Commodities",
                            ticker == "emerging_markets" ~ "Emerging Markets",
                            ticker == "global_stocks" ~ "Global Stocks",
                            ticker == "treasury_notes" ~ "Treasury Notes",
                            ticker == "us_real_estate" ~ "U.S. Real Estate",
                            ticker == "us_bonds" ~ "U.S. Bonds",
                            ticker == "us_stocks" ~ "U.S. Stocks",
                            TRUE ~ NA_character_)) %>% 
      ggplot(aes(x = ticker, y = return, color = ticker)) +
      geom_violin() +
      geom_jitter(width = 0.15, alpha = 0.8) +
      scale_y_continuous(labels = scales::percent) +
      theme_minimal() +
      labs(title = "Distribution of Returns Asset Classes",
           subtitle = paste("For the ", time_period %/% 365, "years ended ", 
                            format(as.Date(end_date), "%B %Y")),
           x = "", y = "Return") +
      theme(legend.position = "none",
            plot.title = element_text(face = "bold", size = 13),
            plot.title.position = "plot")

Commodities, Emerging markets, global stocks, U.S. real estate, and U.S. stocks experienced about the same volatility during the period. U.S. Treasury returns and U.S. bonds experienced low volatility of returns over the period.

up_days <-
      returns_xts %>% 
      as_tibble() %>% 
      {. > 0} %>% 
      colSums()

down_days <-
      returns_xts %>% 
      as_tibble() %>% 
      {. < 0} %>% 
      colSums()

perc_up_days <- 
      {up_days / (up_days + down_days)} %>% 
      tibble(ticker = names(.), perc_up = round(., 2)) %>% 
      arrange(desc(perc_up))

perc_up_days %>% 
  mutate(ticker = case_when(ticker == "commodities" ~ "Commodities",
                            ticker == "emerging_markets" ~ "Emerging Markets",
                            ticker == "global_stocks" ~ "Global Stocks",
                            ticker == "treasury_notes" ~ "Treasury Notes",
                            ticker == "us_real_estate" ~ "U.S. Real Estate",
                            ticker == "us_bonds" ~ "U.S. Bonds",
                            ticker == "us_stocks" ~ "U.S. Stocks",
                            TRUE ~ NA_character_)) %>% 
  mutate(perc_up = paste0(100*perc_up, "%")) %>% 
  select(`Asset Class` = ticker, `% Up` = perc_up) %>% 
  gt() %>% 
  cols_align(
    align = "center",
    columns = `% Up`
  ) %>% 
  tab_header(
    title = "How often did the price go up?",
    subtitle = md("*(over the 15 year period)?*")
  )
How often did the price go up?
(over the 15 year period)?
Asset Class % Up
U.S. Stocks 67%
U.S. Real Estate 62%
U.S. Bonds 60%
Global Stocks 58%
Commodities 54%
Treasury Notes 54%
Emerging Markets 52%

Gains vs. Losses

It turns out that U.S. stocks, U.S. real estate, and U.S. bonds had positive return months at least 60% of the time over the period.

Correlations

corr <- returns_xts %>% cor() %>% round(2)
      
ggcorrplot::ggcorrplot(
      corr,
      outline.col = "white",
      type = "lower",
      hc.order = TRUE,
      lab = TRUE
) +
      labs(title = "Correlations between ETF returns",
           subtitle = "Which ETFs tended to move together?") +
      theme(plot.title = element_text(face = "bold", size = 13),
            plot.title.position = "plot")

Over the period, there were several notable return patterns: - Commodity returns were moderately correlated with emerging market stocks, global stocks, and U.S. stocks - Emerging market returns were strongly correlated with global stocks and U.S. stocks - Global stocks were highly correlated with U.S. stocks - Treasury returns were highly correlated with U.S. bonds - Treasury returns were moderately negatively correlated with commodities and U.S. stocks indicating there are potential diversification benefits from including Treasury ETFs in the portfolio containing stocks

Weights

One decision one has to make is how much of each asset to add to the portfolio. There are many ways to do this and one popular way is to use mean-variance optimization to determine which portfolio (out of a set of portfolios with varying asset weights) achieved the highest return for each unit of risk. Here, risk is measured as the standard deviation of such returns.

Set Benchmark

We will assume that investing equally in all assets is a reasonable benchmark for this analysis.

equal_wts <- rep(1/length(tickers), length(tickers))

wts_list <- list()
length(wts_list) <- length(tickers)
wts_grid <- map(wts_list, ~seq(0, 1, 0.1))

weights_grid <- tibble(expand.grid(
      wts_grid
      )) %>% 
      filter(rowSums(.) == 1)

n_portfolios <- nrow(weights_grid)

weights <- weights_grid %>% transpose() %>% unlist(use.names = FALSE)

ticker_returns_grid <-
      returns %>% 
      select(date, ticker, return) %>% 
      tq_repeat_df(n = n_portfolios)

weights_table <-
      tibble(tickers) %>%
      tq_repeat_df(n = n_portfolios) %>%
      bind_cols(tibble(weights)) %>%
      full_join(tibble(
            portfolio = 0,
            tickers = tickers,
            weights = equal_wts
      )) %>%
      rename(ticker =  tickers, weight = weights)

wts_plot <- 
      weights_table %>%
            filter(portfolio %in% seq(0, 7500, by = 250)) %>%
            ggplot(aes(x = portfolio, y = weight, fill = ticker)) +
            geom_col(alpha = 0.8) +
            scale_y_continuous(labels = scales::percent, breaks = seq(0, 1, by = .1)) +
            theme_minimal() +
            labs(
                  title = "Weights for portfolio assets",
                  subtitle = "What are the possible ways to combine assets in a portfolio",
                  x = "portfolio #",
                  y = "",
                  fill = "Ticker"
            ) +
            theme(plot.title = element_text(face = "bold", size = 13),
                  plot.title.position = "plot")

plotly::ggplotly(wts_plot)

There happen to be thousands of possible portfolio constructions. So, which one yielded the best performance over the period?

Performance stats

# Get portfolio returns
# port_returns_grid <- 
#   ticker_returns_grid %>% 
#   left_join(weights_table) %>% 
#   mutate(wtd_rtn = weight * return) %>% 
#   group_by(portfolio, date) %>% 
#   summarize(port_return = sum(wtd_rtn))

# write_feather(port_returns_grid, "static/data/port_returns_grid")
port_returns_grid <- read_feather(here("static", "data", "port_returns_grid")) %>% ungroup()


bm_returns <-
  returns %>%
  left_join(tibble(weight = equal_wts, ticker = tickers)) %>% 
  select(date, ticker, return, weight) %>% 
  mutate(wtd_rtn = weight * return) %>% 
  group_by(date) %>% 
  summarize(bm_return = sum(wtd_rtn)) 

bm_returns_xts <- 
  bm_returns %>%
  xts(x = .$bm_return, order.by = .$date) %>% 
  setNames("bm_return")

risk_free <- 
      rf_monthly %>% 
      filter(date >= start_date, date <= end_date) %>% 
      select(date, risk_free = monthly_rate)

risk_free_xts <-
      risk_free %>% 
      xts(x = .$risk_free, order.by = .$date) %>% 
      setNames("risk_free")

cleaned_returns_list <-
      port_returns_grid %>%
      filter(portfolio != 0) %>%
      split(., .$portfolio)

cleaned_returns_list_xts <-
      cleaned_returns_list %>% 
      map(~xts(.x$port_return, order.by = .x$date))

# returns_xts <- reduce(cleaned_returns_list_xts, merge)
# saveRDS(returns_xts, "static/data/returns_xts.rds")
returns_xts <- readRDS(here("static", "data", "returns_xts.rds"))
# capm_stats <- table.CAPM(Ra = returns_xts, Rb = bm_returns_xts, Rf = risk_free_xts) 

# capm_stats_cleaned <- 
#   capm_stats %>% 
#   rownames_to_column(var = "measure") %>%
#   as_tibble() %>%
#   set_names(c("measure", paste0("port_", seq_along(.)[-ncol(.)])))

# saveRDS(capm_stats_cleaned, "static/data/capm_stats.rds")
capm_stats <- readRDS(here("static", "data", "capm_stats.rds"))
      

# port_performance <- table.Stats(R = returns_xts)
#       
# port_performance_cleaned <-
#   port_performance %>%
#   rownames_to_column(var = "measure") %>%
#   as_tibble() %>%
#   set_names(c("measure", paste0("port_", seq_along(.)[-ncol(.)])))

# saveRDS(port_performance_cleaned, "static/data/port_performance.rds")
port_performance <- readRDS(here("static", "data", "port_performance.rds"))


port_performance %>% 
  distinct(measure) %>% 
  mutate(Description = c(
    "Number of observations",
    "Number of NA (not available) values",
    "Global minimum",
    "First quartile value (25th percentile)",
    "Median (50th percentile)",
    "Arithmatic average",
    "Average when compounding is applied",
    "Third quartile value (75th percentile)",
    "Global maximum",
    "Standard error of the mean (standard deviation of the means from repeated sampling of the population)",
    "Lower confidence limit (95th percentile) (lower limit for true mean value at a 10% confidence level)",
    "Upper confidence limit (95th percentil) (upper limit for true mean value at a 10% confidence level)",
    "Variance (measure of dispersion)",
    "Standard deviation (normalized measure of disperson)",
    "Skewness (measure of assymetry)",
    "Kurtosis (measure of concentration)"
  )) %>% 
  gt(rowname_col = "measure") %>% 
  tab_stubhead(label = "Measure") %>% 
  tab_header(
    title = md("**Description of Performance Indicators**")
  )
Description of Performance Indicators
Measure Description
Observations Number of observations
NAs Number of NA (not available) values
Minimum Global minimum
Quartile 1 First quartile value (25th percentile)
Median Median (50th percentile)
Arithmetic Mean Arithmatic average
Geometric Mean Average when compounding is applied
Quartile 3 Third quartile value (75th percentile)
Maximum Global maximum
SE Mean Standard error of the mean (standard deviation of the means from repeated sampling of the population)
LCL Mean (0.95) Lower confidence limit (95th percentile) (lower limit for true mean value at a 10% confidence level)
UCL Mean (0.95) Upper confidence limit (95th percentil) (upper limit for true mean value at a 10% confidence level)
Variance Variance (measure of dispersion)
Stdev Standard deviation (normalized measure of disperson)
Skewness Skewness (measure of assymetry)
Kurtosis Kurtosis (measure of concentration)
port_performance <-
  port_performance %>%
  pivot_longer(-measure) %>% 
  filter(measure %in% c("Geometric Mean", "Stdev", "Skewness")) %>% 
  pivot_wider(names_from = "measure") %>% 
  janitor::clean_names() %>% 
  rename(portfolio = name) %>% 
  mutate(portfolio = as.numeric(str_extract(portfolio, "\\d+"))) %>% 
  mutate(annual_return = geometric_mean * 12) %>%
  mutate(annual_stdev = stdev*sqrt(12)) %>%
  mutate(sharpe_ratio = ((annual_return - 0.03) / annual_stdev))

Portfolios with Highest Returns

What portfolios experienced the highest returns over the period?

best_return <-
  port_performance %>%
  arrange(desc(annual_return),
          desc(sharpe_ratio),
          stdev,
          desc(skewness)) %>%
  select(portfolio, sharpe_ratio, geometric_mean, skewness) %>%
  slice(1:10) %>%
  pull(portfolio)

weights_table %>%
  filter(portfolio %in% best_return) %>%
  ggplot(aes(x = ticker, y = weight, fill = ticker)) +
  geom_col() +
  geom_text(aes(label = scales::percent_format()(weight)),
            nudge_y = 0.07,
            color = "darkgray") +
  scale_y_continuous(labels = scales::percent) +
  facet_wrap(. ~ paste("Portfolio: ", portfolio)) +
  labs(
    x = "",
    y = "",
    title = "Portfolio Weights",
    subtitle = "Weights for portfolios with best returns",
    fill = "Asset"
  ) +
  theme_bw() +
  theme(
    axis.text.y = element_blank(),
    plot.title = element_text(face = "bold", size = 13),
    legend.background = element_rect(
      fill = "white",
      size = 4,
      colour = "white"
    ),
    axis.ticks = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.title.position = "plot",
    axis.text.x = element_blank()
  )

It’s no surprise that the top 10 portfolios based on total return were comprised almost entirely of U.S. stocks which significantly outperformed the other asset classes over the investment period. So, if we had invested almost all of our cash in U.S. stocks, we would have outperformed other asset classes. However, at the same time, we need to account for risk (we’re using standard deviation as as a proxy for volatility/risk). Let’s look at volatility and how it should be considered in the investment decision.

Portfolio Volatility

Let’s rank the portfolios in terms of return and volatility. For returns, a rank one will be assigned to the portfolio with the highest return. For volatility, a rank of one will be assigned to the portfolio with the lowest standard deviation.

port_performance %>% 
  # arrange(desc(annual_stdev)) %>% 
  select(portfolio, geometric_mean, annual_stdev) %>% 
  # mutate(return_rank = row_number(-geometric_mean)) %>% 
  # mutate(stdev_rank = row_number(annual_stdev)) %>% 
  # mutate(across(annual_stdev, ~round(.x, 2))) %>% 
  mutate(return_percentile = 100*round(percent_rank(geometric_mean), 3)) %>%
  mutate(sd_percentile = 100*round(percent_rank(annual_stdev), 2)) %>%
      select(-geometric_mean, -annual_stdev) %>% 
  filter(portfolio %in% best_return) %>% 
  arrange(desc(return_percentile)) %>% 
  rename(`Portolio #` = portfolio, `Return Percentile` = return_percentile,
         `Stdev. Percentile` = sd_percentile) %>% 
  gt() %>% 
  tab_header(
    title = "What was the volatility of of the portfolios with high returns?",
    subtitle = md("*(over the 15 year period)*")
  ) %>% 
  tab_footnote(
    footnote = "A percentile of 100 is assigned to the portfolio with the highest return.",
    locations = cells_column_labels(columns = c(`Return Percentile`))
  ) %>% 
   tab_footnote(
    footnote = "A percentile of 100 is assigned to the portfolio with the lowest standard deviation.",
    locations = cells_column_labels(columns = c(`Stdev. Percentile`))
  )
What was the volatility of of the portfolios with high returns?
(over the 15 year period)
Portolio # Return Percentile1 Stdev. Percentile2
7657 100.0 79
7658 100.0 77
7647 99.9 80
7649 99.9 61
7650 99.9 63
7651 99.9 81
7653 99.9 79
7654 99.9 77
7655 99.9 59
7656 99.9 61
1 A percentile of 100 is assigned to the portfolio with the highest return.
2 A percentile of 100 is assigned to the portfolio with the lowest standard deviation.

Of the 10 portfolios with the highest returns, all had relatively high volatility. The standard deviation ranks for all top 10 performing portfolios are greater than 4500, meaning that at least 4500 other portfolios had lower volatility than each of these high return portfolios. So, what should we have done?

Investment Efficiency

Of course, every investor tries to maximize the total return on investments. However, higher returns usually come with higher volatility. Investors want high returns and low volatility. So, how can we maximize our returns for each unit of volatility?

best_sharpe <-
  port_performance %>% 
  arrange(desc(sharpe_ratio),
          desc(geometric_mean),
          stdev,
          desc(skewness)) %>%
  select(portfolio, sharpe_ratio, geometric_mean, skewness) %>%
  slice(1:10) %>%
  pull(portfolio)


weights_table %>%
  filter(portfolio %in% best_sharpe) %>%
  ggplot(aes(x = ticker, y = weight, fill = ticker)) +
  geom_col() +
  geom_text(aes(label = scales::percent_format()(weight)),
            nudge_y = 0.07,
            color = "darkgray") +
  scale_y_continuous(labels = scales::percent) +
  facet_wrap(. ~ paste("Portfolio: ", portfolio)) +
  labs(
    x = "",
    y = "",
    title = "Best risk-adjusted returns",
    subtitle = "Weights for portfolios with best sharpe_ratios (best return for each unit of risk)",
    fill = "Ticker"
  ) +
  theme_bw() +
  theme(
    axis.text.y = element_blank(),
    plot.title = element_text(face = "bold", size = 13),
    legend.background = element_rect(
      fill = "white",
      size = 4,
      colour = "white"
    ),
    axis.ticks = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.title.position = "plot",
    axis.text.x = element_blank()
  )

It turns out the efficient portfolios (i.e., portfolios that maximize return to risk) consisted of a mix of Treasury funds and U.S. stocks, and sometimes U.S. bonds. What does this reveal? It seems that a mix of higher returns (U.S. stocks) and low volatility (Treasury funds and U.S. bonds) resulted in the best balance of return and risk. One option for an investor in this case is to mix these three assets, usually mostly comprised of Treasury funds, followed by U.S. stocks, and then U.S. bonds (e.g., 50%, 40%, 10%, respectively for portfolio 6904).

How did the best Sharpe ratio (efficient) portfolios perform based on total returns?

port_performance %>%
  arrange(desc(annual_return),
          desc(sharpe_ratio),
          stdev,
          desc(skewness)) %>% 
  mutate(return_percentile = 100*round(percent_rank(annual_return), 2)) %>% 
  mutate(annual_return = round(annual_return, 3)) %>% 
  filter(portfolio %in% best_sharpe) %>% 
  select(`Portfolio #` = portfolio, `Annual Return` = annual_return, `Return Percentile` = return_percentile) %>% 
  gt() %>% 
  tab_header(
    title = "How did the portfolios with the best Sharpe ratios perform based on just return?",
    subtitle = md("*(over the 15 year period)*")
  ) %>% 
  tab_footnote(
    footnote = "A percentile of 100 is assigned to the portfolio with the highest return.",
    locations = cells_column_labels(columns = c(`Return Percentile`))
  )
How did the portfolios with the best Sharpe ratios perform based on just return?
(over the 15 year period)
Portfolio # Annual Return Return Percentile1
7501 0.077 99
7282 0.072 97
7317 0.071 96
6848 0.066 93
6904 0.065 92
6939 0.065 92
6959 0.064 90
6158 0.060 85
6288 0.059 84
6311 0.058 81
1 A percentile of 100 is assigned to the portfolio with the highest return.

Overall, each of the top 10 efficient portfolios were in the top 20th percentile in terms of total return. So, it seems like a balanced, more efficient portfolio can minimize volatility for each level of return and at the same time not sacrifice too much in terms of total return.

Summary

Over the last 15 years and 10 months it would have been generally better to invest almost all of one’s cash into the overall U.S. stock market if one did not care about risk. However, since investors do care about risk, it turns out it would have been better to combine a mix of Treasury funds, U.S. bonds, and U.S. stocks to experience a more efficient and balanced portfolio while still experiencing high returns.

Limitations

This analysis spans a relatively small time period. It is not a good idea to make future investment decisions based on research using such short data-spans. One approach to the data limitation problem is to run the analysis on indexes (not ETFs) with longer data histories and then utilize ETFs as proxies to represent the uninvestible indexes. This may be appropriate if the ETFs closely track the indexes they are purported to track.

Aaron Hardy
Aaron Hardy
Data analyst, programmer, and investor

My research interests include programming, finance, dashboards, and algorithmic modeling.