* Setup
cd "/Users/Brian/Dropbox/Grad School/Sixth Year/Econ:Poli 5/Lectures/Week 5"

* Load Data

use ./data/movie_ratings_rev.dta, replace
d

/Users/Brian/Dropbox/Grad School/Sixth Year/Econ:Poli 5/Lectures/Week 5


Contains data from ./data/movie_ratings_rev.dta
  obs:           127                          
 vars:             8                          22 Dec 2020 18:42
--------------------------------------------------------------------------------
              storage   display    value
variable name   type    format     label      variable label
--------------------------------------------------------------------------------
film            str70   %70s                  Title of Movie
rottentomatoes  byte    %8.0g                 Rottent Tomatoes Critic Score
metacritic      byte    %8.0g                 Metacritic Score
genre           str22   %22s                  
box_office      double  %10.0g                Box Office Revenue in Millions
tickets         double  %10.0g                Tickets Sold in Millions
subsample       float   %9.0g                 Subsample For Illustrative
                                                Purposes
n               float   %9.0g                 
--------------------------------------------------------------------------------
Sorted by: rottentomatoes


twoway scatter metacritic rottentomatoes, msymbol(circle_hollow) ///
    title("Relation Between Metacritic and Rotten Tomatoes Scores") ///
    xtitle("Rotten Tomatoes Score") ///
    ytitle("Metacritic Score") ///
    graphregion(color(white) fcolor(white))


reg metacritic rottentomatoes

      Source |       SS           df       MS      Number of obs   =       127
-------------+----------------------------------   F(1, 125)       =   1414.98
       Model |  41425.9232         1  41425.9232   Prob > F        =    0.0000
    Residual |  3659.58865       125  29.2767092   R-squared       =    0.9188
-------------+----------------------------------   Adj R-squared   =    0.9182
       Total |  45085.5118       126  357.821522   Root MSE        =    5.4108

-------------------------------------------------------------------------------
   metacritic |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
--------------+----------------------------------------------------------------
rottentomat~s |   .5996453   .0159411    37.62   0.000     .5680958    .6311948
        _cons |   21.78114   1.039368    20.96   0.000      19.7241    23.83818
-------------------------------------------------------------------------------


* Create a variable named points that goes from 0 to 100
range points 0 100 101

* Generate line of best fit
gen best_fit = _b[_cons] + _b[rottentomatoes]*points

(26 missing values generated)

(26 missing values generated)


di _b[rottentomatoes]

.59964528


*predicted values
predict yhat

*resiudals
gen residuals = metacritic-yhat

(option xb assumed; fitted values)


%head film metacritic yhat residuals


twoway scatter box_office rottentomatoes, msymbol(circle_hollow) ///
    title("Relation Between Box Office Receipts and Rotten Tomatoes") ///
    xtitle("Rotten Tomatoes Score") ///
    ytitle("Box Office Revenue (in Millions)") ///
    graphregion(color(white) fcolor(white))


twoway (scatter box_office rottentomatoes) (lfit box_office rottentomatoes), ///
    title("Relation Between Box Office Receipts and Rotten Tomatoes") ///
    xtitle("Rotten Tomatoes Score") ///
    ytitle("Box Office Revenue (in Millions)") ///
    graphregion(color(white) fcolor(white))


reg box_office rottentomatoes

      Source |       SS           df       MS      Number of obs   =       127
-------------+----------------------------------   F(1, 125)       =      0.87
       Model |  8234.53875         1  8234.53875   Prob > F        =    0.3531
    Residual |  1184939.29       125  9479.51432   R-squared       =    0.0069
-------------+----------------------------------   Adj R-squared   =   -0.0010
       Total |  1193173.83       126  9469.63357   Root MSE        =    97.363

-------------------------------------------------------------------------------
   box_office |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
--------------+----------------------------------------------------------------
rottentomat~s |   .2673486   .2868477     0.93   0.353    -.3003586    .8350558
        _cons |   38.37105   18.70257     2.05   0.042     1.356338    75.38576
-------------------------------------------------------------------------------


**** Option 1
*form predicted value
predict yhat_box_office, xb
*form residual
gen resid1 = box_office-yhat_box_office

**** Option 2 -- use residual option
predict resid2, residuals

**** Sometimes commands have useful options 
**** that will save you time if you remember
**** to read the documentation


%head resid1 resid2


sum resid1
list film rottentomatoes box_office yhat resid1 if resid1 == `r(max)'


    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
      resid1 |        127    4.64e-07    96.97567  -64.37617   594.9178


     +-------------------------------------------------------------+
     |           film   rotten~s   box_off~e       yhat     resid1 |
     |-------------------------------------------------------------|
 76. | Jurassic World         71   652.27062   64.35596   594.9178 |
     +-------------------------------------------------------------+


graph bar box_office, over(genre) graphregion(color(white) fcolor(white)) ///
ytitle(Box Office in Millions)

	film	metacritic	yhat	residuals
1	Paul Blart: Mall Cop 2	13	24.779369	-11.779369
2	Hitman: Agent 47	28	25.978659	2.0213413
3	Hot Pursuit	31	26.578304	4.4216957
4	Fantastic Four	27	27.17795	-.17794991
5	Taken 3	26	27.17795	-1.1779499
6	The Boy Next Door	30	27.777596	2.2224045
7	The Loft	24	28.377239	-4.3772392
8	Unfinished Business	32	28.377239	3.6227608
9	Seventh Son	30	28.976885	1.0231152
10	Mortdecai	27	28.976885	-1.9768848

	resid1	resid2
1	31.383801	31.383801
2	-17.77504	-17.77504
3	-5.929637	-5.9296379
4	15.340361	15.340361
5	48.479237	48.479237
6	-5.0384717	-5.0384712
7	-35.3092	-35.3092
8	-31.092384	-31.092384
9	-23.853451	-23.853449
10	-33.883102	-33.883099

Stata V

Econ 5/Poli 5D Lecture 9

Announcements¶

Application -- Movie Reviews¶

Data¶

What does Linear Regression tell us?¶

Interpretation of Slope Coefficient¶

Predicted Value¶

Model Error (Residuals)¶

Predict Command¶

Conclusion¶