Housekeeping

rm(list = ls())
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(plyr)

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

library(ggplot2)
library(plm)

## 
## Attaching package: 'plm'

## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead

# Import world-happiness-report.csv and change the necessary column names
library(readr)
index <- read_csv("~/Desktop/R Working Directory/world-happiness-report.csv")

## Rows: 2098 Columns: 11

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Country name
## dbl (10): year, Life Ladder, Log GDP per capita, Social support, Healthy lif...

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

colnames(index) <- c("name","year","happiness","loggdp","social","healthylife","freedom","generosity","corruption","positive","negative")
nrow(index)

## [1] 2098

index %>%
  summarise_all(funs(sum(is.na(.))))

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))

## # A tibble: 1 x 11
##    name  year happiness loggdp social healthylife freedom generosity corruption
##   <int> <int>     <int>  <int>  <int>       <int>   <int>      <int>      <int>
## 1     0     0         0     36     13          55      32         89        110
## # … with 2 more variables: positive <int>, negative <int>

# Positive affect and Negative affect columns have a high number of missing value: 171 (8.15%) and 165 (7.86%), respectively. Consequently, we decide to drop these two columns.
drops <- c("positive","negative")
index = index[,!(names(index) %in% drops)]

EDA

# Create a correlation matrix between six factors that influence happiness
drops1 <- c("name","year","happiness") #Dropping Country name, year, and dependent variable happiness
index_cor <- index[,!(names(index) %in% drops1)]
index_cor <- na.omit(index_cor)
corr <- cor(index_cor)
library(corrplot)

## corrplot 0.90 loaded

corrplot(corr, type="lower", method="square")

# Descriptive analysis was performed to see how the dataset is shaped and find out any interesting insights about the data before modeling. Because we are interested in the relationships between the happiness and loggdp, we visualized the relationship between these two variables.
# happiness~loggdp
x=aggregate(happiness~loggdp,data = index,FUN=mean)
ggplot(index,aes(x=loggdp,y=happiness))+geom_point(size=2, shape=18, col="lightblue")+ xlab("Log GDP per Capita") + ylab("Happiness")+geom_smooth(method = lm,col='red')

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 36 rows containing non-finite values (stat_smooth).

## Warning: Removed 36 rows containing missing values (geom_point).

# Comparison of Life Ladder (Happiness level) of each country in 2021
happiness21 <- read.csv("~/Desktop/R Working Directory/world-happiness-report-2021.csv")
View(happiness21)
names(happiness21)[names(happiness21)=="Country.name"] <- "country"
names(happiness21)[names(happiness21)=="Ladder.score"] <- "happiness"
require(data.table)

## Loading required package: data.table

## 
## Attaching package: 'data.table'

## The following object is masked from 'package:plm':
## 
##     between

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

order <- data.table(happiness21, key="happiness")
bottom10 <- head(order,10)
ggplot(bottom10, aes(x = reorder(country, happiness), y = happiness))+ geom_bar(stat="identity", fill="lightblue") +ylab("Happiness")+xlab("Country")+ggtitle("10 Countries with the lowest level of Happiness in 2021")

top10 <- tail(order,10)
ggplot(top10, aes(x = reorder(country, happiness), y = happiness))+geom_bar(stat="identity", fill="lightblue")+ylab("Happiness")+xlab("Country")+ggtitle("10 Countries with the highest level of Happiness in 2021")

# LINEAR REGRESSION MODEL
reg <- lm(happiness~loggdp, data=index)
summary(reg)

## 
## Call:
## lm(formula = happiness ~ loggdp, data = index)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3261 -0.4734 -0.0128  0.5203  1.9339 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.66879    0.12308  -13.56   <2e-16 ***
## loggdp       0.76252    0.01303   58.51   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6829 on 2060 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.6243, Adjusted R-squared:  0.6241 
## F-statistic:  3423 on 1 and 2060 DF,  p-value: < 2.2e-16

# We can see that for 1% change in GDP, happiness index increases on average by 0.76, ceteris paribus. The relationship between happiness and GDP is also statistically significant.
# There seems to be a positive relationship between happiness and GDP. However, our model may suffer from OVB because there are many factors that affect happiness such as employment, equality, freedom, etc. that have not been taken into consideration. The correlation might actually reflect some other “unobserved” factor that is not included in the analysis.
# Multiple linear regression
reg1 <- lm(happiness~loggdp+freedom+generosity+corruption, data=index)
summary(reg1)

## 
## Call:
## lm(formula = happiness ~ loggdp + freedom + generosity + corruption, 
##     data = index)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2464 -0.3765  0.0512  0.3922  1.8493 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.81031    0.17589 -10.292  < 2e-16 ***
## loggdp       0.67412    0.01358  49.630  < 2e-16 ***
## freedom      1.68150    0.12037  13.970  < 2e-16 ***
## generosity   0.72881    0.09468   7.698 2.21e-14 ***
## corruption  -0.37260    0.08980  -4.149 3.48e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6121 on 1898 degrees of freedom
##   (195 observations deleted due to missingness)
## Multiple R-squared:  0.7041, Adjusted R-squared:  0.7035 
## F-statistic:  1129 on 4 and 1898 DF,  p-value: < 2.2e-16

# We can see that the relationship between GDP and happiness is still positive and statistically significant. However, the magnitude of GDP on happiness is smaller: for 1% change in GDP, happiness index increases on average by 0.67, ceteris paribus.

# PANEL REGRESSION (with unbalanced panel)
# Transform countries to unique IDs
index <- transform(index,id=as.numeric(factor(index$name)))
View(index)
# Set the panel dataframe
# "id" is the entity ID
# "year" is the time ID
index.p <- pdata.frame(index, index=c("id","year"))
# Find the dimensions of our panel dataframe
pdim(index.p)

## Unbalanced Panel: n = 166, T = 1-16, N = 2098

# We have 166 countries (entities), observed over 17 periods, for a total of 2,098 observations. Note that we have an unbalanced panel in that not all countries are observed throughout 17 periods.
# Let's consider the panel nature of our dataset.  Perhaps happiness is different (on average) for each country (e.g. varying with perception and evaluation).  Or perhaps happiness is different through time (e.g. government policies).  These variables are in effect omitted, and if relevant, then our causal estimate may be biased (OVB). So let's run an LSDV regression and a panel regression for happiness on loggdp while controlling for countries and time effects
# Generate entity dummy variables
entity.f <- factor(index$id)
# Generate time dummy variables
time.f <- factor(index$year)
# Run and summarize the LSDV regression
index_lm <- lm(happiness~loggdp+entity.f+time.f,data=index)
summary(index_lm)

## 
## Call:
## lm(formula = happiness ~ loggdp + entity.f + time.f, data = index)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54723 -0.20819  0.00291  0.21681  1.61148 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.537560   0.725339  -7.634 3.58e-14 ***
## loggdp       1.229193   0.094612  12.992  < 2e-16 ***
## entity.f2   -0.630756   0.219411  -2.875 0.004089 ** 
## entity.f3   -0.229360   0.226223  -1.014 0.310775    
## entity.f4   -0.719968   0.251342  -2.864 0.004223 ** 
## entity.f5   -0.155865   0.266532  -0.585 0.558761    
## entity.f6   -0.959300   0.211802  -4.529 6.29e-06 ***
## entity.f7   -0.077479   0.327051  -0.237 0.812758    
## entity.f8   -0.236466   0.338779  -0.698 0.485267    
## entity.f9   -0.860463   0.228930  -3.759 0.000176 ***
## entity.f10  -1.230571   0.326976  -3.763 0.000173 ***
## entity.f11   0.640541   0.148931   4.301 1.79e-05 ***
## entity.f12  -0.547381   0.247338  -2.213 0.027011 *  
## entity.f13  -0.432988   0.330635  -1.310 0.190503    
## entity.f14   1.151296   0.312576   3.683 0.000237 ***
## entity.f15   0.207854   0.151344   1.373 0.169795    
## entity.f16  -0.106507   0.281169  -0.379 0.704880    
## entity.f17   0.669827   0.185092   3.619 0.000304 ***
## entity.f18  -0.480988   0.222840  -2.158 0.031019 *  
## entity.f19  -2.043380   0.243018  -8.408  < 2e-16 ***
## entity.f20   0.686026   0.231472   2.964 0.003077 ** 
## entity.f21  -1.628518   0.257286  -6.330 3.06e-10 ***
## entity.f22   0.848426   0.145618   5.826 6.65e-09 ***
## entity.f23   1.198892   0.206106   5.817 7.03e-09 ***
## entity.f24   0.223618   0.147327   1.518 0.129223    
## entity.f25   0.576066   0.147678   3.901 9.92e-05 ***
## entity.f26   0.008735   0.326027   0.027 0.978628    
## entity.f27   0.829448   0.208404   3.980 7.15e-05 ***
## entity.f28   0.838176   0.144382   5.805 7.52e-09 ***
## entity.f29  -0.067014   0.264572  -0.253 0.800071    
## entity.f30  -0.485502   0.211466  -2.296 0.021792 *  
## entity.f31   0.473160   0.223719   2.115 0.034563 *  
## entity.f32   0.021452   0.179890   0.119 0.905090    
## entity.f33   0.423452   0.161444   2.623 0.008789 ** 
## entity.f34   1.799042   0.184111   9.771  < 2e-16 ***
## entity.f35   1.074538   0.246176   4.365 1.34e-05 ***
## entity.f36  -0.928299   0.276302  -3.360 0.000796 ***
## entity.f38  -0.922839   0.309135  -2.985 0.002870 ** 
## entity.f39  -0.380111   0.305504  -1.244 0.213578    
## entity.f40   0.163165   0.337381   0.484 0.628709    
## entity.f41   0.874126   0.218808   3.995 6.72e-05 ***
## entity.f42  -0.651047   0.231404  -2.813 0.004952 ** 
## entity.f43   0.191986   0.211374   0.908 0.363847    
## entity.f44  -0.999644   0.207761  -4.812 1.62e-06 ***
## entity.f45   0.813602   0.189281   4.298 1.81e-05 ***
## entity.f46  -1.165843   0.293661  -3.970 7.46e-05 ***
## entity.f47   1.043365   0.159403   6.545 7.62e-11 ***
## entity.f48   0.301890   0.327160   0.923 0.356250    
## entity.f49  -0.583796   0.320163  -1.823 0.068396 .  
## entity.f50  -1.424668   0.242486  -5.875 4.98e-09 ***
## entity.f51   1.293966   0.216096   5.988 2.54e-09 ***
## entity.f52  -1.294714   0.215116  -6.019 2.11e-09 ***
## entity.f53  -0.567634   0.331573  -1.712 0.087072 .  
## entity.f54   0.420574   0.157407   2.672 0.007608 ** 
## entity.f55  -1.269252   0.291187  -4.359 1.38e-05 ***
## entity.f56   1.138033   0.190378   5.978 2.70e-09 ***
## entity.f57   0.650192   0.158696   4.097 4.36e-05 ***
## entity.f58   0.989961   0.410765   2.410 0.016046 *  
## entity.f59   0.677348   0.152090   4.454 8.94e-06 ***
## entity.f60   0.801876   0.167405   4.790 1.80e-06 ***
## entity.f61  -2.051017   0.345863  -5.930 3.59e-09 ***
## entity.f62  -1.337156   0.281782  -4.745 2.24e-06 ***
## entity.f63   0.020920   0.342990   0.061 0.951370    
## entity.f64  -0.137113   0.163050  -0.841 0.400498    
## entity.f65  -0.109468   0.201832  -0.542 0.587627    
## entity.f66  -0.911798   0.234843  -3.883 0.000107 ***
## entity.f67  -0.741666   0.209087  -3.547 0.000399 ***
## entity.f68  -0.661898   0.355255  -1.863 0.062595 .  
## entity.f69   0.183602   0.305944   0.600 0.548501    
## entity.f70  -0.924991   0.317182  -2.916 0.003584 ** 
## entity.f71   0.222427   0.173744   1.280 0.200632    
## entity.f72   0.504846   0.228191   2.212 0.027060 *  
## entity.f73  -1.118327   0.310804  -3.598 0.000329 ***
## entity.f74  -0.335187   0.206941  -1.620 0.105459    
## entity.f75  -0.619651   0.266873  -2.322 0.020345 *  
## entity.f76   0.211947   0.150847   1.405 0.160171    
## entity.f77   0.330478   0.202822   1.629 0.103396    
## entity.f78  -1.311886   0.348681  -3.762 0.000173 ***
## entity.f79   0.740806   0.159152   4.655 3.47e-06 ***
## entity.f80   0.209358   0.186636   1.122 0.262113    
## entity.f81  -1.142600   0.278606  -4.101 4.29e-05 ***
## entity.f82  -1.110336   0.243059  -4.568 5.24e-06 ***
## entity.f83   0.101173   0.199657   0.507 0.612402    
## entity.f84   1.039683   0.162504   6.398 1.98e-10 ***
## entity.f85  -0.267375   0.247819  -1.079 0.280766    
## entity.f86  -0.909080   0.287709  -3.160 0.001604 ** 
## entity.f87  -1.298137   0.402813  -3.223 0.001292 ** 
## entity.f88   0.857190   0.153438   5.587 2.65e-08 ***
## entity.f89   1.450129   0.164289   8.827  < 2e-16 ***
## entity.f90  -0.684627   0.271433  -2.522 0.011742 *  
## entity.f91  -0.978460   0.348356  -2.809 0.005024 ** 
## entity.f92   0.762268   0.145265   5.247 1.72e-07 ***
## entity.f93  -0.590492   0.309176  -1.910 0.056299 .  
## entity.f94  -0.179641   0.166951  -1.076 0.282062    
## entity.f95  -0.431103   0.271711  -1.587 0.112765    
## entity.f96   0.447246   0.250836   1.783 0.074744 .  
## entity.f97   0.188055   0.204858   0.918 0.358749    
## entity.f98  -0.326357   0.206779  -1.578 0.114667    
## entity.f99  -0.844832   0.250726  -3.370 0.000768 ***
## entity.f100  0.031807   0.190458   0.167 0.867386    
## entity.f101  1.944889   0.173837  11.188  < 2e-16 ***
## entity.f102  0.003813   0.172475   0.022 0.982362    
## entity.f103 -0.774061   0.225279  -3.436 0.000603 ***
## entity.f104  0.927541   0.144867   6.403 1.92e-10 ***
## entity.f105 -0.048493   0.338286  -0.143 0.886031    
## entity.f106  0.189613   0.312962   0.606 0.544678    
## entity.f107  1.034186   0.166996   6.193 7.23e-10 ***
## entity.f108  1.601928   0.155216  10.321  < 2e-16 ***
## entity.f109  0.429903   0.166730   2.578 0.010000 ** 
## entity.f110 -1.566169   0.475626  -3.293 0.001010 ** 
## entity.f111 -0.964218   0.233296  -4.133 3.74e-05 ***
## entity.f112 -0.156304   0.354709  -0.441 0.659515    
## entity.f113 -0.089349   0.472365  -0.189 0.849993    
## entity.f114  0.836529   0.157621   5.307 1.24e-07 ***
## entity.f115  0.175590   0.166017   1.058 0.290345    
## entity.f116  0.085660   0.276233   0.310 0.756517    
## entity.f117  0.027094   0.214733   0.126 0.899605    
## entity.f118  0.064867   0.213034   0.304 0.760788    
## entity.f119  0.281970   0.180791   1.560 0.119013    
## entity.f120 -0.721380   0.280251  -2.574 0.010127 *  
## entity.f121 -1.371201   0.295497  -4.640 3.72e-06 ***
## entity.f122 -1.719639   0.419556  -4.099 4.33e-05 ***
## entity.f123 -0.916346   0.270126  -3.392 0.000707 ***
## entity.f124 -1.055499   0.275173  -3.836 0.000129 ***
## entity.f125  0.357597   0.148891   2.402 0.016414 *  
## entity.f126 -0.807304   0.325483  -2.480 0.013213 *  
## entity.f127  0.568058   0.146644   3.874 0.000111 ***
## entity.f128 -0.689022   0.239441  -2.878 0.004052 ** 
## entity.f129  0.941923   0.150342   6.265 4.60e-10 ***
## entity.f130 -1.556686   0.379624  -4.101 4.30e-05 ***
## entity.f131 -0.625621   0.287497  -2.176 0.029672 *  
## entity.f132 -0.870428   0.302344  -2.879 0.004035 ** 
## entity.f135 -0.828548   0.220514  -3.757 0.000177 ***
## entity.f136 -1.171265   0.306858  -3.817 0.000139 ***
## entity.f138 -0.588816   0.308572  -1.908 0.056518 .  
## entity.f139 -1.172329   0.210920  -5.558 3.12e-08 ***
## entity.f140  0.151443   0.206682   0.733 0.463813    
## entity.f141  0.129059   0.443167   0.291 0.770915    
## entity.f142 -0.776474   0.249580  -3.111 0.001892 ** 
## entity.f143 -0.070646   0.332231  -0.213 0.831630    
## entity.f144 -0.183626   0.359650  -0.511 0.609714    
## entity.f145 -0.683022   0.200450  -3.407 0.000670 ***
## entity.f146 -1.039554   0.333759  -3.115 0.001869 ** 
## entity.f147  1.180985   0.142594   8.282 2.27e-16 ***
## entity.f148  0.099933   0.141031   0.709 0.478668    
## entity.f149  0.095146   0.237445   0.401 0.688679    
## entity.f150  0.608364   0.163376   3.724 0.000202 ***
## entity.f151 -0.432511   0.320492  -1.350 0.177332    
## entity.f152 -0.722304   0.210199  -3.436 0.000603 ***
## entity.f153 -1.295225   0.269515  -4.806 1.66e-06 ***
## entity.f154 -0.120259   0.226583  -0.531 0.595654    
## entity.f155  0.892760   0.140964   6.333 2.99e-10 ***
## entity.f156 -0.907350   0.219025  -4.143 3.58e-05 ***
## entity.f157 -0.915027   0.353723  -2.587 0.009760 ** 
## entity.f158 -0.349552   0.320885  -1.089 0.276144    
## entity.f159 -0.504979   0.344465  -1.466 0.142820    
## entity.f160  0.054895   0.252019   0.218 0.827591    
## entity.f161  1.140632   0.171428   6.654 3.73e-11 ***
## entity.f162  0.447522   0.228024   1.963 0.049839 *  
## entity.f163  0.536459   0.173723   3.088 0.002044 ** 
## entity.f164 -0.068763   0.159639  -0.431 0.666705    
## entity.f165  0.487366   0.148659   3.278 0.001063 ** 
## entity.f166  0.065589   0.142367   0.461 0.645063    
## time.f2006  -0.359585   0.086053  -4.179 3.07e-05 ***
## time.f2007  -0.243109   0.083569  -2.909 0.003667 ** 
## time.f2008  -0.277629   0.082970  -3.346 0.000836 ***
## time.f2009  -0.209794   0.082786  -2.534 0.011352 *  
## time.f2010  -0.289557   0.082091  -3.527 0.000430 ***
## time.f2011  -0.281569   0.081249  -3.466 0.000541 ***
## time.f2012  -0.364900   0.081555  -4.474 8.12e-06 ***
## time.f2013  -0.417924   0.081845  -5.106 3.62e-07 ***
## time.f2014  -0.426264   0.081891  -5.205 2.15e-07 ***
## time.f2015  -0.447474   0.082064  -5.453 5.61e-08 ***
## time.f2016  -0.450191   0.082344  -5.467 5.18e-08 ***
## time.f2017  -0.402064   0.082495  -4.874 1.19e-06 ***
## time.f2018  -0.367842   0.083691  -4.395 1.17e-05 ***
## time.f2019  -0.368115   0.083977  -4.384 1.23e-05 ***
## time.f2020  -0.305297   0.086294  -3.538 0.000413 ***
## time.f2021  -0.360213   0.082891  -4.346 1.46e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.377 on 1883 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.8953, Adjusted R-squared:  0.8854 
## F-statistic: 90.48 on 178 and 1883 DF,  p-value: < 2.2e-16

# Run the specification via the plm command by using the within model option
index_plm <- plm(happiness~loggdp,data=index.p,model="within",effect="twoways")
summary(index_plm)

## Twoways effects Within Model
## 
## Call:
## plm(formula = happiness ~ loggdp, data = index.p, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 162, T = 1-16, N = 2062
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -1.5472328 -0.2081878  0.0029075  0.2168072  1.6114843 
## 
## Coefficients:
##        Estimate Std. Error t-value  Pr(>|t|)    
## loggdp 1.229193   0.094612  12.992 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    291.68
## Residual Sum of Squares: 267.68
## R-Squared:      0.082264
## Adj. R-Squared: -0.0044892
## F-statistic: 168.789 on 1 and 1883 DF, p-value: < 2.22e-16

# We can see that both regressions have the coefficient on loggdp of 1.229 and is statistically significant. This implies that the treatment effect remains positive, but the OLS specification seems to have been biased downward the effect of loggdp on happiness by omitting entity and time effects.
# The estimates are the same, implying that we can estimate the fixed effects model via LSDV or twoway fixed ala PLM. Again, the PLM is most preferred for it's compactness.

# PANEL REGRESSION (with balanced panel)
index_unbalanced <- index[!index$year %in% c(2005:2013),]
index_balanced <- index[!index$year %in% c(2005:2013),]
countyear <- ddply(index_balanced, .(name), nrow)
eightyear <- (countyear$name[which (countyear[,2] == 8)])
index_balanced <- index_balanced[index_balanced$name %in% eightyear,]
View(index_balanced)
# Set the panel dataframe
#"id" is the entity ID
#"year" is the time ID
index.p1 <- pdata.frame(index_balanced, index=c("id","year")) #Balanced panel data
index.p2 <- pdata.frame(index_unbalanced, index=c("id","year")) #Unbalanced panel data
# Find the dimensions of our panel dataframe
pdim(index.p1)

## Balanced Panel: n = 85, T = 8, N = 680

pdim(index.p2)

## Unbalanced Panel: n = 160, T = 1-8, N = 1107

# For the balanced panel, we have 85 countries (entities), each observed for 8 periods, for a total of 680 observations.
# For the unbalanced panel, we have 160 countries (entities) over 8 periods for a total of 1107 observations.
# Run the specification via the plm command by using the within model option
index_plm1 <- plm(happiness~loggdp,data=index.p1,model="within",effect="twoways")
summary(index_plm1)

## Twoways effects Within Model
## 
## Call:
## plm(formula = happiness ~ loggdp, data = index.p1, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 85, T = 5-8, N = 668
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -1.1071651 -0.1354522 -0.0048293  0.1401133  1.2881520 
## 
## Coefficients:
##        Estimate Std. Error t-value  Pr(>|t|)    
## loggdp  2.28597    0.25029  9.1335 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    52.49
## Residual Sum of Squares: 45.84
## R-Squared:      0.1267
## Adj. R-Squared: -0.013031
## F-statistic: 83.4202 on 1 and 575 DF, p-value: < 2.22e-16

index_plm2 <- plm(happiness~loggdp,data=index.p2,model="within",effect="twoways")
summary(index_plm2)

## Twoways effects Within Model
## 
## Call:
## plm(formula = happiness ~ loggdp, data = index.p2, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 158, T = 1-8, N = 1078
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -1.4546617 -0.1472344 -0.0048135  0.1662633  1.2724146 
## 
## Coefficients:
##        Estimate Std. Error t-value  Pr(>|t|)    
## loggdp  1.42432    0.20482  6.9542 6.759e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    96.711
## Residual Sum of Squares: 91.841
## R-Squared:      0.050357
## Adj. R-Squared: -0.12145
## F-statistic: 48.3604 on 1 and 912 DF, p-value: 6.759e-12

# Conduct t-test for randomization
X_balance <- mean(index_balanced$happiness, na.rm=TRUE)
X_balance #5.785

## [1] 5.784975

X_unbalanced <- mean(index_unbalanced$happiness, na.rm=TRUE)
X_unbalanced #5.498

## [1] 5.497712

s_balance <- sd(index_balanced$happiness)
s_balance #1.044

## [1] 1.044096

s_unbalanced <- sd(index_unbalanced$happiness)
s_unbalanced #1.110

## [1] 1.10983

nrow(index_balanced) #680

## [1] 680

nrow(index_unbalanced) #1107

## [1] 1107

se_balance <- s_balance^2/nrow(index_balanced)
se_unbalanced <- s_unbalanced^2/nrow(index_unbalanced)
tstat <- (X_balance-X_unbalanced)/(sqrt(se_balance+se_unbalanced))
tstat #t-test for randomization (long way)

## [1] 5.512267

t.test(index_balanced$happiness,index_unbalanced$happiness,alternative="two.sided") #t-test for randomization (short way)

## 
##  Welch Two Sample t-test
## 
## data:  index_balanced$happiness and index_unbalanced$happiness
## t = 5.5123, df = 1503.9, p-value = 4.163e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1850404 0.3894859
## sample estimates:
## mean of x mean of y 
##  5.784975  5.497712

R Notebook

EDA