```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(haven)
library(dplyr)
library(Hmisc)
library(ggplot2)
library(psych)
library(margins)
library(effects)
library(marginaleffects)
library(lmtest)
library(sandwich)
```

load data

```{r}
 saf<- read_dta("Data Files/ps2030.safrica.dta")
```

recode variable

```{r}
saf <- mutate(saf, locdich = ifelse(locpart == 0, 0, 1))

# Label the variable
label(saf$locdich) <- "Participated in Any Local Activity"

table(saf$locdich)
```

scatter plot

```{r}
# Scatter plot with jitter and linear fit
ggplot(saf, aes(x = educ1, y = locdich)) +
  geom_jitter(width = 0.5, height = 0.1) +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Scatter plot of locdich and educ1",
       x = "educ1",
       y = "locdich")+
  scale_y_continuous(breaks = seq(0,1,0.2))
```

linear model

```{r}
lm_loc<-lm(locdich~ educ1, data=saf)
summary(lm_loc)
```

prediction

```{r}
saf$lpmpred<-lm_loc$fitted.values
describe(saf$lpmpred[saf$educ1 == 1])
describe(saf$lpmpred[saf$educ1 == 3])
describe(saf$lpmpred[saf$educ1 == 5])
```

margins

```{r}
avg_predictions(lm_loc,variables = list(educ1 = seq(1,7, by=2)))

```

Group by educ1 and calculate mean for lpmpred and locdich

```{r}
m <- saf %>%
  group_by(educ1) %>%
  summarise(mean_lpmpred = mean(lpmpred),
            mean_locdich = mean(locdich))
print(m)
```

```{r}
saf$lpmres<-lm_loc$residuals
```

SHOW NON-NORMALITY AND HETEROSKEDASTICITY

```{r}
# rvfplot with jitter(20) and yline(0)
ggplot(saf, aes(x = lpmpred, y = lpmres)) +
  geom_jitter(width = 0.1, height = 0.2) +
  geom_hline(yintercept = 0, linetype = "solid", color = "red") +
  labs(title = "Residuals vs Fitted Values",
       x = "Fitted Values",
       y = "Residuals")
```

histogram

```{r}
# hist of lpmres
ggplot(saf, aes(x = lpmres)) +
  geom_histogram(bins = 29, breaks = seq(-0.97611231, 0.7, by = .05427739), fill = "yellow", color = "black", alpha = 0.7, aes(y = ..density..)) +
  labs(title = "Histogram of Fitted Values", x = "Residuals", y = "Density") 
```

SHOW OUT-OF-BOUNDS PREDICTIONS

```{r}
lm_loc2<-lm(locdich~ educ1 + groups + know + times, data=saf)
summary(lm_loc2)
saf$lpmpred2<-lm_loc2$fitted.values
saf$lpmpred2[saf$lpmpred2 <= 0 | saf$lpmpred2 >= 1]
```

##GOLDBERGER LPM MODEL WITH WEIGHTED LEAST SQUARES

```{r}
# Generate lpmpred3 as lpmpred2
saf <- mutate(saf, lpmpred3 = lpmpred2)

# Replace values in lpmpred3
saf$lpmpred3[saf$lpmpred2 < 0] <- 0.001
saf$lpmpred3[saf$lpmpred2 > 1] <- 0.995
# Generate wt
saf <- mutate(saf, wt = 1 / (lpmpred3 * (1 - lpmpred3)))

# Regression
lm_loc_w <- lm(locdich ~ educ1 + groups + know + times, data = saf, weights = wt)
summary(lm_loc_w)

# Generate lpmpred4
saf <- mutate(saf, lpmpred4 = coef(lm_loc_w)[1] + coef(lm_loc_w)[2] * educ1 + 
                             coef(lm_loc_w)[3] * groups + coef(lm_loc_w)[4] * know + 
                             coef(lm_loc_w)[5] * times)

cor(saf$lpmpred4, saf$locdich)
```

0.4073\^2 is the R-SQUARED IN THE ORIGINAL UNITS OF LOCAL, NOT THE "WEIGHTED" UNITS AS IN THE WLS MODEL

## Logit

```{r}
# Logistic regression
lg_loc <- glm(locdich ~ times, data = saf, family = binomial)
summary(lg_loc)

# Predicted probabilities
saf$plogit  <- predict(lg_loc, type = "response")

# Predicted logits (index function xb)
saf$logodds <- predict(lg_loc, type = "link")

# Generate odds
saf <- within(saf, {
  odds1 <- exp(logodds)
})

# Simulate probability of 1 for a person at times=0
prob_times_0 <- exp(coef(lg_loc)[1] + coef(lg_loc)[2] * 0) / (1 + exp(coef(lg_loc)[1] + coef(lg_loc)[2] * 0))
cat("Simulated probability of 1 for a person at times=0:", prob_times_0, "\n")

# Simulate probability of 1 for a person at times=4
prob_times_4 <- exp(coef(lg_loc)[1] + coef(lg_loc)[2] * 4) / (1 + exp(coef(lg_loc)[1] + coef(lg_loc)[2] * 4))
cat("Simulated probability of 1 for a person at times=4:", prob_times_4, "\n")

summary_table <- saf %>%
  group_by(times) %>%
  summarise(mean_plogit = mean(plogit),
            mean_odds1 = mean(odds1),
            mean_logodds = mean(logodds))

# Display the summary table
print(summary_table)
```

margins:

```{r}

preloc_lg<-avg_predictions(lg_loc, type="response", variables = list(times = seq(0, 4, by = 1)))


# plot with no points
ggplot(as.data.frame(preloc_lg), aes(x = times, y = estimate)) +
  geom_point()+
  geom_line() +
  geom_errorbar(aes(ymin = estimate - 2*std.error, ymax = estimate + 2*std.error), alpha = 0.5) +
  labs(title = "Adjusted predictions",
       x = "Times", y = "Pr(locdich)")+
scale_x_continuous(breaks = seq(0,4,1)) +
  scale_y_continuous(breaks = seq(0.4,0.9,0.1))
```

```{r}
# Sort the data by 'times'
saf <- saf[order(saf$times), ]

# Plot connected line
ggplot(saf, aes(x = times, y = plogit)) +
  geom_point()+
  geom_line() +
  labs(title = "Connected Line Plot of Predicted Probabilities against times",
       x = "times",
       y = "Predicted Probability")
```

WHAT IF INTERCEPT CHANGED FROM -.07 TO -1.0??

```{r}
# Calculate predicted probabilities with a different intercept
intercept_new <- -1.0
saf$plogit1 <- exp(intercept_new + coef(lg_loc)[2] * saf$times) / (1 + exp(intercept_new + coef(lg_loc)[2] * saf$times))

# table 
table_p1 <- saf %>%
  group_by(times) %>%
  summarise(mean_plogit = mean(plogit),
            mean_plogit1 = mean(plogit1))
table_p1

# Plot connected lines
ggplot(saf, aes(x = times)) +
  geom_point(aes(y = plogit), color = "blue", size = 1)+
  geom_line(aes(y = plogit), color = "blue", linetype = "solid", size = 0.5) +
  geom_point(aes(y = plogit1), color = "red", size = 1) +
  geom_line(aes(y = plogit1), color = "red", linetype = "solid", size = 0.5) +
  labs(title = "Comparison of Predicted Probabilities with Different Intercepts",
       x = "times",
       y = "Predicted Probability") +
  scale_linetype_manual(values = c("solid", "solid"), labels = c("Original Intercept", "Intercept: -1"))
```

WHAT IF INTERCEPT CHANGED FROM -.07 TO 1??

```{r}
# Calculate predicted probabilities with a different intercept
intercept_new <- 1
saf$plogit2 <- exp(intercept_new + coef(lg_loc)[2] * saf$times) / (1 + exp(intercept_new + coef(lg_loc)[2] * saf$times))

## table 
table_p2 <- saf %>%
  group_by(times) %>%
  summarise(mean_plogit = mean(plogit),
            mean_plogit1 = mean(plogit1),
            mean_plogit2 = mean(plogit2))
table_p2

# Plot connected lines
ggplot(saf, aes(x = times)) +
  geom_point(aes(y = plogit), color = "blue", size = 1)+
  geom_line(aes(y = plogit), color = "blue", linetype = "solid", size = 0.5) +
  geom_point(aes(y = plogit1), color = "red", size = 1) +
  geom_line(aes(y = plogit1), color = "red", linetype = "solid", size = 0.5) + 
    geom_point(aes(y = plogit2), color = "green", size = 1) +
  geom_line(aes(y = plogit2), color = "green", linetype = "solid", size = 0.5) +
  labs(title = "Comparison of Predicted Probabilities with Different Intercepts",
       x = "times",
       y = "Predicted Probability") +
  scale_linetype_manual(values = c("solid", "solid", "solid"), labels = c("Original Intercept", "Intercept: -1", "Intercept: 1"))
```

WHAT IF SLOPE CHANGED FROM .47 to .10?

```{r}
# Calculate predicted probabilities with a different slope
saf$plogit3 <- exp(coef(lg_loc)[1]+ 0.1 * saf$times) / (1 + exp(coef(lg_loc)[1] + 0.1 * saf$times))

# table 
table_p3 <- saf %>%
  group_by(times) %>%
  summarise(mean_plogit = mean(plogit),
            mean_plogit3 = mean(plogit3))
table_p3
```

WHAT IF SLOPE CHANGED FROM .47 to 1?

```{r}
# Calculate predicted probabilities with a different slope
saf$plogit4 <- exp(coef(lg_loc)[1]+ 1 * saf$times) / (1 + exp(coef(lg_loc)[1] + 1 * saf$times))

# table 
table_p4 <- saf %>%
  group_by(times) %>%
  summarise(mean_plogit = mean(plogit),
            mean_plogit3 = mean(plogit3),
            mean_plogit4 = mean(plogit4))
table_p4

# Plot connected lines
ggplot(saf, aes(x = times)) +
  geom_point(aes(y = plogit), color = "blue", size = 1)+
  geom_line(aes(y = plogit), color = "blue", linetype = "solid", size = 0.5) +
  geom_point(aes(y = plogit3), color = "red", size = 1) +
  geom_line(aes(y = plogit3), color = "red", linetype = "solid", size = 0.5) + 
    geom_point(aes(y = plogit4), color = "green", size = 1) +
  geom_line(aes(y = plogit4), color = "green", linetype = "solid", size = 0.5) +
  labs(title = "Comparison of Predicted Probabilities with Different Intercepts",
       x = "times",
       y = "Predicted Probability") +
  scale_linetype_manual(values = c("solid", "solid", "solid"), labels = c("Original slope", "Slope: 0.1", "Slope: 1"))

```

##Probit

```{r}
# Fit a probit model
probit_loc <- glm(locdich ~ times, data = saf, family = binomial(link = "probit"))
summary(probit_loc)

# Predicted probabilities
saf$pprobit <- predict(probit_loc, type = "response")

# Linear predictors (z-scores)
saf$zscore <- predict(probit_loc, type = "link")

```

margins:

```{r}
preloc_pro<-avg_predictions(probit_loc, type="response", variables = list(times = seq(0, 4, by = 1)))
preloc_pro

# plot with no points
ggplot(as.data.frame(preloc_pro), aes(x = times, y = estimate)) +
  geom_point()+
  geom_line() +
  geom_errorbar(aes(ymin = estimate - 2*std.error, ymax = estimate + 2*std.error), alpha = 0.5) +
  labs(title = "Adjusted predictions",
       x = "Times", y = "Pr(locdich)")+
scale_x_continuous(breaks = seq(0,4,1)) +
  scale_y_continuous(breaks = seq(0.4,0.9,0.1))
```

In R, you can use the pnorm function to calculate the cumulative distribution function (CDF) of the standard normal distribution.

```{r}
# Simulate probability of 1 for a person at times=0
prob_times_0 <- pnorm(coef(probit_loc)[1] + coef(probit_loc)[2] * 0)

# Simulate probability of 1 for a person at times=4
prob_times_4 <- pnorm(coef(probit_loc)[1] + coef(probit_loc)[2] * 4)

# Display the results
cat("Simulated probability of 1 for a person at times=0:", prob_times_0, "\n")
cat("Simulated probability of 1 for a person at times=4:", prob_times_4, "\n")
```

In R, you can use the qnorm function to calculate the quantile function (inverse cumulative distribution function) of the standard normal distribution. This can be used to find the z-score for a given probability.

```{r}
# Given probability
probability = 0.483

# Calculate the z-score for the given probability
z_score1 = qnorm(probability)

# Display the result
cat("Z-score for a person with a probability of", probability, ":", z_score1, "\n")
```

change probability to 0.5

```{r}
# Given probability
probability = 0.5

# Calculate the z-score for the given probability
z_score2 = qnorm(probability)

# Display the result
cat("Z-score for a person with a probability of", probability, ":", z_score2, "\n")
```

PREDICTED PROBABILITY AND PREDICTED Z-SCORE FOR ALL LEVELS OF X

```{r}
table_z <- saf %>%
  group_by(times) %>%
  summarise(mean_pprobit = mean(pprobit),
            mean_zscore = mean(zscore))
table_z
```

COMPARES PREDICTED PROBABILITIES FOR PROBIT AND LOGIT MODEL

```{r}
table_pz <- saf %>%
  group_by(times) %>%
  summarise(mean_pprobit = mean(pprobit),
            mean_plogit = mean(plogit))
table_pz
```

##INTERPRETATION OF COEFFICIENTS ###ODDS AND FACTOR CHANGE IN ODDS INTERPRETATION IN LOGIT

```{r}
# Fit a logistic regression model
lg_loc_k <- glm(locdich ~ know, data = saf, family = binomial)
summary(lg_loc_k)
# SHOWS THE PROBABILITY FOR SOMEONE WITH NO KNOWLEDGE(=0)
preloc_lg_k <-avg_predictions(lg_loc_k, type="response", variables = list(know = 0))
preloc_lg_k
```

```{r}
# Calculate odds for a probability of 0.245
probability <- 0.245
odds <- probability / (1 - probability)

# Display the odds
cat("Odds for a probability of", probability, ":", odds, "\n")

# Display the logit for the calculated odds
logit <- log(odds)
cat("Logit for a probability of", probability, ":", logit, "\n")
```

SHOWS EFFECT OF X ON THE "ODDS" BY EXPONENTIATING THE B FROM THE LOGIT MODEL

```{r}
summary(lg_loc_k)$coefficients[, c("Estimate", "Std. Error", "Pr(>|z|)")]

# Calculate the odds ratio
odds_ratio <- exp(coef(lg_loc_k)['know'])
cat("Odds ratio for 'know':", odds_ratio, "\n")
```

GENERATION OF THE PREDICTED LOG-ODDS FOR ALL LEVELS OF KNOWLEDGE

```{r}
saf$logodds2 <- predict(lg_loc_k, type = "link")

# Calculate odds from the predicted log-odds
saf$odds2 <- exp(saf$logodds2)

table_log_odds <- saf %>%
  group_by(know) %>%
  summarise(mean_logodds2 = mean(logodds2),
            mean_odds2 = mean(odds2))
table_log_odds
```

In R, there isn't a direct equivalent to the or option in the Stata logit command for odds ratio interpretation in the summary function.

EXERCISE: VERIFY THE CONSTANT FACTOR CHANGE IN THE ODDS

```{r}
lg_loc_m <- glm(locdich ~ know + groups + educ1 + factor(male), data = saf, family = binomial(link = "logit"))

# Obtain summary statistics
summary_stats <- summary(lg_loc_m)

# Display model coefficients, standard errors, and odds ratios
print(summary_stats$coefficients[, c("Estimate", "Std. Error", "Pr(>|z|)")])

# Calculate odds ratios manually
exp(coefficients(lg_loc_m))
```

PROBABILITY AND MARGINAL CHANGE INTERPRETATIONS

```{r}
probit_loc_m <- glm(locdich ~ groups + know + student + male, data = saf, family = binomial(link = "probit"))

summary(probit_loc_m) 

#calculate robust standard errors
coeftest(probit_loc_m, vcovHC(probit_loc_m, type = "HC1"))

# standard errors are slightly different from those in STATA
```

MER: All VARIABLES AT THEIR MINIMUM

```{r}
avg_predictions(probit_loc_m, type = "response",variables = list(groups = 0, know = 0, student = 0, male = 0))
```

MER: ALL VARIABLES AT THEIR MAXIMUM

```{r}

avg_predictions(probit_loc_m, type = "response",variables = list(groups = 5, know = 8, student = 1, male = 1))

```

CHANGES IN PREDICTED PROBABILITIES: MARGINAL CHANGE

```{r}
# ALL OTHER VARIABLES AT THIER OBSERVED VALUES
summary(margins(probit_loc_m, all = TRUE))
# ALL OTHER VARIABLES SET AT THEIR MEANS
summary(margins(probit_loc_m, all = TRUE, at = list(groups = mean(saf$groups), know=mean(saf$know), student =mean(saf$student) , male = mean(saf$male))))
        
```

CHANGES IN PREDICTED PROBABILITIES: DISCRETE CHANGE

MER: RANGE OF GROUP MEMBERSHIPS

DISCRETE CHANGE FROM 0 to 5 GROUPS USING MARGINAL EFFECTS AT MEAN APPROACH:

```{r}
avg_predictions(probit_loc_m, type = "response",newdata="mean", variables = list(groups = c(0,5)))

```

DISCRETE CHANGE FROM 0 to 5 GROUPS, HOLDING OTHER VARS AT OBSERVED SAMPLE VALUES

```{r}
avg_predictions(probit_loc_m, type = "response",variables = list(groups = c(0,5)))

```

SET OTHER VARIABLES TO THEIR MINIMUM AND MAXIMUM

```{r}
#DIFFERENCE BETWEEN MIN AND MAX ON GROUPS, HOLDING OTHER VARS AT MINIMUM
avg_predictions(probit_loc_m, type = "response",variables = list(groups = c(0,5), know = 0, student = 0, male = 0))


# DIFFERENCE BETWEEN MIN AND MAX ON GROUPS, HOLDING OTHER VARS AT MAX
avg_predictions(probit_loc_m, type = "response",variables = list(groups = c(0,5), know = 8, student = 1, male = 1))

```

GENERATE FULL DISCRETE AND MARGINAL CHANGE INFORMATION

```{r}
# margins
summary(margins(probit_loc_m, all = TRUE))
# when x+1, what is the change of y (observed values)
avg_comparisons(probit_loc_m, type="response") 
# when x+sd, what is the change of y (observed values)
avg_comparisons(probit_loc_m, type="response", variables = list(groups = sd(saf$groups), know=sd(saf$know)))
```

FOR DISCRETE CHANGES CENTERED AROUND MEAN

```{r}
# define a funtion as center_diff for centering the change (change by 1)
center_diff <- \(x) data.frame(x - 0.5, x + 0.5)
avg_comparisons( 
  probit_loc_m,
  variables = list(groups = center_diff))

avg_comparisons( 
  probit_loc_m,
  variables = list(know = center_diff))

#alternative way to do this without defining a new function
avg_comparisons(
  probit_loc_m,
  variables = list(groups = data.frame(low=saf$groups-0.5, high=saf$groups+0.5)))
```

```{r}
# define a funtion as center_diff for centering the change (change by sd)
center_diff_sd <- \(x) data.frame(x - 0.5*sd(x), x + 0.5*sd(x))
avg_comparisons( 
  probit_loc_m, type="response",
  variables = list(groups = center_diff_sd))

avg_comparisons( 
  probit_loc_m,type="response",
  variables = list(know = center_diff_sd))
```

FOR DISCRETE CHANGES CENTERED AROUND MEAN, OTHER VARIABLES HELD AT MEAN

```{r}
# when x+1, what is the change of y (at mean)
avg_comparisons(probit_loc_m, type="response", newdata="mean",variables = list(groups = center_diff)) 

avg_comparisons(probit_loc_m, type="response", newdata="mean",variables = list(know = center_diff)) 

# when x+sd, what is the change of y (at mean)
avg_comparisons(
  probit_loc_m,newdata="mean", type="response", variables=list(groups="sd", know="sd"))

#marginal effect 
avg_slopes(probit_loc_m,newdata="mean")

```

You can also calculate these marginal effects mannually

```{r}
# margins
summary(margins(probit_loc_m, all = TRUE))

# take groups as an example
# when x+1, what is the change of y (observed values)

# Predicted probabilities at the original value of x
saf$predicted_probs_original <- predict(probit_loc_m, type = "response")

# Predicted probabilities at x + 1
saf$predicted_probs_x_plus_1 <- predict(probit_loc_m, newdata = transform(saf, groups = saf$groups+1) , type = "response")

#Predicted probabilities at x + SD(x)
saf$predicted_probs_x_plus_sd <- predict(probit_loc_m, newdata = transform(saf, groups = saf$groups + sd(saf$groups)), type = "response")


# Change when x increases by one unit
mean(saf$predicted_probs_x_plus_1 - saf$predicted_probs_original)

# Change when x increases by one SD
mean(saf$predicted_probs_x_plus_sd - saf$predicted_probs_original)
```

FOR DISCRETE CHANGES CENTERED AROUND MEAN

```{r}
# when x+1, what is the change of y (observed values)

# Predicted probabilities at the original value of x
saf$predicted_probs_plus_1_l <- predict(probit_loc_m, newdata = transform(saf, groups = saf$groups-0.5), type = "response")

# Predicted probabilities at x + 1
saf$predicted_probs_x_plus_1_h <- predict(probit_loc_m, newdata = transform(saf, groups = saf$groups+0.5) , type = "response")

#Predicted probabilities at x + SD(x)
saf$predicted_probs_x_plus_sd_l <- predict(probit_loc_m, newdata = transform(saf, groups = saf$groups - sd(saf$groups)/2), type = "response")

saf$predicted_probs_x_plus_sd_h <- predict(probit_loc_m, newdata = transform(saf, groups = saf$groups + sd(saf$groups)/2), type = "response")

# Change when x increases by one unit
mean(saf$predicted_probs_x_plus_1_h - saf$predicted_probs_plus_1_l)

# Change when x increases by one SD
mean(saf$predicted_probs_x_plus_sd_h - saf$predicted_probs_x_plus_sd_l)
```

FOR DISCRETE CHANGES CENTERED AROUND MEAN, OTHER VARIABLES HELD AT MEAN

```{r}
# when x+1, what is the change of y (observed values)

# Predicted probabilities at the original value of x
saf$predicted_probs_plus_1_l_m <- predict(probit_loc_m, newdata = transform(saf, groups = mean(saf$groups)-0.5, know=mean(saf$know), student=mean(saf$student), male=mean(saf$male)), type = "response")

# Predicted probabilities at x + 1
saf$predicted_probs_x_plus_1_h_m <- predict(probit_loc_m, newdata = transform(saf, groups = mean(saf$groups)+0.5, know=mean(saf$know), student=mean(saf$student), male=mean(saf$male)) , type = "response")

#Predicted probabilities at x + SD(x)
saf$predicted_probs_x_plus_sd_l_m <- predict(probit_loc_m, newdata = transform(saf, groups = mean(saf$groups) - sd(saf$groups)/2, know=mean(saf$know), student=mean(saf$student), male=mean(saf$male)), type = "response")

saf$predicted_probs_x_plus_sd_h_m <- predict(probit_loc_m, newdata = transform(saf, groups = mean(saf$groups) + sd(saf$groups)/2, know=mean(saf$know), student=mean(saf$student), male=mean(saf$male)), type = "response")

# Change when x increases by one unit
mean(saf$predicted_probs_x_plus_1_h_m - saf$predicted_probs_plus_1_l_m)

# Change when x increases by one SD
mean(saf$predicted_probs_x_plus_sd_h_m - saf$predicted_probs_x_plus_sd_l_m)
```

GIVE PREDICTED PROBABILITIES FOR DIFFERENT CATEGORIES OF DUMMY VARIABLES, REST AT MEAN OR OBSERVED VALUES

```{r}
summary(probit_loc_m)
avg_predictions(probit_loc_m, type = "response",variables = list(male = c(0,1)))

avg_predictions(probit_loc_m, type = "response",variables = list(student = c(0,1)))

predictions(probit_loc_m, type = "response",newdata = "mean", variables = list(male = c(0,1)))
predictions(probit_loc_m, type = "response",newdata = "mean", variables = list(student = c(0,1)))

avg_predictions(probit_loc_m, type = "response",variables = list(male = c(0,1), student=c(0, 1)))

```

### LATENT VARIABLE Y\* INTERPRETATION

```{r}
probit_loc_m2 <- glm(locdich ~ educ1 + know + groups+ student + male, data = saf, family = binomial(link = "probit"))

summary_m2<-summary(probit_loc_m2) 

# Display model coefficients, standard errors, and odds ratios
print(summary_m2$coefficients[, c("Estimate", "Std. Error","z value", "Pr(>|z|)")])

```

Do the manual calculation as do file suggests.