[This article was first published on DataGeeek, and kindly contributed to R-bloggers]. (You can report issue about the content on this page here)
Want to share your content on R-bloggers? click here if you have a blog, or here if you don’t.
Charles Schwab analysts said that historically, budget deficits have had minimal impact on Treasury yields, primarily due to the United States’ economic dominance and its status as the issuer of the world’s reserve currency.
The variable importance analysis with the XGBoost machine learning model confirms the aforementioned statement.

Source code:
library(tidyverse) library(tidymodels) library(tidyquant) #30-year Treasury yield (^TYX) df_yield_30 <- tq_get("^TYX") %>% tq_transmute(select = close, mutate_fun = to.monthly, col_rename = "yield_30") %>% mutate(date = as.Date(date)) #Federal Surplus or Deficit [-] (MTSDS133FMS) df_deficit <- tq_get("MTSDS133FMS", get = "economic.data") %>% select(date, deficit = price) #Merging the datasets df_merged <- df_yield_30 %>% left_join(df_deficit) %>% drop_na() #Data split splits <- initial_time_split(df_merged, prop = 0.8) df_train <- training(splits) df_test <- testing(splits) #Bootstrapping for tuning set.seed(12345) df_folds <- bootstraps(df_train, times = 100) #Model model_spec <- boost_tree(trees = tune(), learn_rate = tune()) %>% set_engine("xgboost") %>% set_mode("regression") #Preprocessing recipe_spec <- recipe(yield_30 ~ ., data = df_train) %>% step_date(date, features = "month", ordinal = FALSE) %>% step_dummy(all_nominal_predictors(), one_hot = TRUE) %>% step_mutate(date_num = as.numeric(date)) %>% step_normalize(all_numeric_predictors()) %>% step_rm(date) #Workflow sets wflow_xgboost <- workflow_set( preproc = list(recipe = recipe_spec), models = list(model = model_spec) ) #Tuning and evaluating all the models grid_ctrl <- control_grid( save_pred = TRUE, parallel_over = "everything", save_workflow = TRUE ) grid_results <- wflow_xgboost %>% workflow_map( seed = 98765, resamples = df_folds, grid = 10, control = grid_ctrl ) #Accuracy of the grid results grid_results %>% rank_results(select_best = TRUE, rank_metric = "rsq") %>% select(Models = wflow_id, .metric, mean) #Finalizing the model with the best parameters best_param <- grid_results %>% extract_workflow_set_result("recipe_model") %>% select_best(metric = "rsq") wflw_fit <- grid_results %>% extract_workflow("recipe_model") %>% finalize_workflow(best_param) %>% fit(df_train) #Variable importance library(DALEXtra) #Fitted workflow for KNN set.seed(98765) knn_wflow_fitted <- workflow() %>% add_recipe(rec_features) %>% add_model(spec_knn) %>% fit(df_train) #Processed data frame for variable importance calculation imp_data <- recipe_spec %>% prep() %>% bake(new_data = NULL) #Explainer object explainer_xgboost <- explain_tidymodels( wflw_fit %>% extract_fit_parsnip(), data = imp_data %>% select(-yield_30), y = imp_data$yield_30, label = "", verbose = FALSE ) #Calculating permutation-based variable importance set.seed(1983) vip_xgboost <- model_parts(explainer_xgboost, loss_function = loss_root_mean_square, type = "difference", B = 100,#the number of permutations label = "") #Plot VIP vip_xgboost %>% plot() + labs(color = "", x = "", y = "", subtitle = "Higher indicates more important", title = "Factors Affecting 30-year Treasury Yield") + theme_minimal(base_family = "Roboto Slab", base_size = 16) + theme(legend.position = "none", plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), plot.subtitle = element_text(hjust = 0.5, size = 12), panel.grid.minor.x = element_blank(), panel.grid.major.y = element_blank(), plot.background = element_rect(fill = "azure"))
Related