Preliminary Plots

Load in libraries

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(RColorBrewer)

Read in speed dating data (for preliminary graphs, only use data without nulls)

data_path <- "speed_dating_data.csv"
spd_dat <- read.csv(data_path)
summary(spd_dat)
      iid              id            gender            idg       
 Min.   :  1.0   Min.   : 1.00   Min.   :0.0000   Min.   : 1.00  
 1st Qu.:154.0   1st Qu.: 4.00   1st Qu.:0.0000   1st Qu.: 8.00  
 Median :281.0   Median : 8.00   Median :1.0000   Median :16.00  
 Mean   :283.7   Mean   : 8.96   Mean   :0.5006   Mean   :17.33  
 3rd Qu.:407.0   3rd Qu.:13.00   3rd Qu.:1.0000   3rd Qu.:26.00  
 Max.   :552.0   Max.   :22.00   Max.   :1.0000   Max.   :44.00  
                 NA's   :1                                       
     condtn           wave           round          position     
 Min.   :1.000   Min.   : 1.00   Min.   : 5.00   Min.   : 1.000  
 1st Qu.:2.000   1st Qu.: 7.00   1st Qu.:14.00   1st Qu.: 4.000  
 Median :2.000   Median :11.00   Median :18.00   Median : 8.000  
 Mean   :1.829   Mean   :11.35   Mean   :16.87   Mean   : 9.043  
 3rd Qu.:2.000   3rd Qu.:15.00   3rd Qu.:20.00   3rd Qu.:13.000  
 Max.   :2.000   Max.   :21.00   Max.   :22.00   Max.   :22.000  
                                                                 
    positin1          order           partner            pid       
 Min.   : 1.000   Min.   : 1.000   Min.   : 1.000   Min.   :  1.0  
 1st Qu.: 4.000   1st Qu.: 4.000   1st Qu.: 4.000   1st Qu.:154.0  
 Median : 9.000   Median : 8.000   Median : 8.000   Median :281.0  
 Mean   : 9.296   Mean   : 8.928   Mean   : 8.964   Mean   :283.9  
 3rd Qu.:14.000   3rd Qu.:13.000   3rd Qu.:13.000   3rd Qu.:408.0  
 Max.   :22.000   Max.   :22.000   Max.   :22.000   Max.   :552.0  
 NA's   :1846                                       NA's   :10     
     match           int_corr         samerace          age_o      
 Min.   :0.0000   Min.   :-0.830   Min.   :0.0000   Min.   :18.00  
 1st Qu.:0.0000   1st Qu.:-0.020   1st Qu.:0.0000   1st Qu.:24.00  
 Median :0.0000   Median : 0.210   Median :0.0000   Median :26.00  
 Mean   :0.1647   Mean   : 0.196   Mean   :0.3958   Mean   :26.36  
 3rd Qu.:0.0000   3rd Qu.: 0.430   3rd Qu.:1.0000   3rd Qu.:28.00  
 Max.   :1.0000   Max.   : 0.910   Max.   :1.0000   Max.   :55.00  
                  NA's   :158                       NA's   :104    
     race_o         pf_o_att        pf_o_sin        pf_o_int    
 Min.   :1.000   Min.   :  0.0   Min.   : 0.00   Min.   : 0.00  
 1st Qu.:2.000   1st Qu.: 15.0   1st Qu.:15.00   1st Qu.:17.39  
 Median :2.000   Median : 20.0   Median :18.37   Median :20.00  
 Mean   :2.757   Mean   : 22.5   Mean   :17.40   Mean   :20.27  
 3rd Qu.:4.000   3rd Qu.: 25.0   3rd Qu.:20.00   3rd Qu.:23.81  
 Max.   :6.000   Max.   :100.0   Max.   :60.00   Max.   :50.00  
 NA's   :73      NA's   :89      NA's   :89      NA's   :89     
    pf_o_fun        pf_o_amb        pf_o_sha         dec_o       
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   :0.0000  
 1st Qu.:15.00   1st Qu.: 5.00   1st Qu.: 9.52   1st Qu.:0.0000  
 Median :18.00   Median :10.00   Median :10.64   Median :0.0000  
 Mean   :17.46   Mean   :10.69   Mean   :11.85   Mean   :0.4196  
 3rd Qu.:20.00   3rd Qu.:15.00   3rd Qu.:16.00   3rd Qu.:1.0000  
 Max.   :50.00   Max.   :53.00   Max.   :30.00   Max.   :1.0000  
 NA's   :98      NA's   :107     NA's   :129                     
     attr_o          sinc_o          intel_o           fun_o       
 Min.   : 0.00   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 5.00   1st Qu.: 6.000   1st Qu.: 6.000   1st Qu.: 5.000  
 Median : 6.00   Median : 7.000   Median : 7.000   Median : 7.000  
 Mean   : 6.19   Mean   : 7.175   Mean   : 7.369   Mean   : 6.401  
 3rd Qu.: 8.00   3rd Qu.: 8.000   3rd Qu.: 8.000   3rd Qu.: 8.000  
 Max.   :10.50   Max.   :10.000   Max.   :10.000   Max.   :11.000  
 NA's   :212     NA's   :287      NA's   :306      NA's   :360     
     amb_o            shar_o           like_o           prob_o      
 Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 6.000   1st Qu.: 4.000   1st Qu.: 5.000   1st Qu.: 4.000  
 Median : 7.000   Median : 6.000   Median : 6.000   Median : 5.000  
 Mean   : 6.778   Mean   : 5.475   Mean   : 6.135   Mean   : 5.208  
 3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.: 7.000   3rd Qu.: 7.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :722      NA's   :1076     NA's   :250      NA's   :318     
     met_o           age           field              field_cd     
 Min.   :1.00   Min.   :18.00   Length:8378        Min.   : 1.000  
 1st Qu.:2.00   1st Qu.:24.00   Class :character   1st Qu.: 5.000  
 Median :2.00   Median :26.00   Mode  :character   Median : 8.000  
 Mean   :1.96   Mean   :26.36                      Mean   : 7.662  
 3rd Qu.:2.00   3rd Qu.:28.00                      3rd Qu.:10.000  
 Max.   :8.00   Max.   :55.00                      Max.   :18.000  
 NA's   :385    NA's   :95                         NA's   :82      
   undergra            mn_sat            tuition               race      
 Length:8378        Length:8378        Length:8378        Min.   :1.000  
 Class :character   Class :character   Class :character   1st Qu.:2.000  
 Mode  :character   Mode  :character   Mode  :character   Median :2.000  
                                                          Mean   :2.757  
                                                          3rd Qu.:4.000  
                                                          Max.   :6.000  
                                                          NA's   :63     
    imprace          imprelig          from             zipcode         
 Min.   : 0.000   Min.   : 1.000   Length:8378        Length:8378       
 1st Qu.: 1.000   1st Qu.: 1.000   Class :character   Class :character  
 Median : 3.000   Median : 3.000   Mode  :character   Mode  :character  
 Mean   : 3.785   Mean   : 3.652                                        
 3rd Qu.: 6.000   3rd Qu.: 6.000                                        
 Max.   :10.000   Max.   :10.000                                        
 NA's   :79       NA's   :79                                            
    income               goal            date           go_out     
 Length:8378        Min.   :1.000   Min.   :1.000   Min.   :1.000  
 Class :character   1st Qu.:1.000   1st Qu.:4.000   1st Qu.:1.000  
 Mode  :character   Median :2.000   Median :5.000   Median :2.000  
                    Mean   :2.122   Mean   :5.007   Mean   :2.158  
                    3rd Qu.:2.000   3rd Qu.:6.000   3rd Qu.:3.000  
                    Max.   :6.000   Max.   :7.000   Max.   :7.000  
                    NA's   :79      NA's   :97      NA's   :79     
    career             career_c          sports          tvsports     
 Length:8378        Min.   : 1.000   Min.   : 1.000   Min.   : 1.000  
 Class :character   1st Qu.: 2.000   1st Qu.: 4.000   1st Qu.: 2.000  
 Mode  :character   Median : 6.000   Median : 7.000   Median : 4.000  
                    Mean   : 5.278   Mean   : 6.425   Mean   : 4.575  
                    3rd Qu.: 7.000   3rd Qu.: 9.000   3rd Qu.: 7.000  
                    Max.   :17.000   Max.   :10.000   Max.   :10.000  
                    NA's   :138      NA's   :79       NA's   :79      
    exercise          dining          museums            art        
 Min.   : 1.000   Min.   : 1.000   Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 5.000   1st Qu.: 7.000   1st Qu.: 6.000   1st Qu.: 5.000  
 Median : 6.000   Median : 8.000   Median : 7.000   Median : 7.000  
 Mean   : 6.246   Mean   : 7.784   Mean   : 6.986   Mean   : 6.715  
 3rd Qu.: 8.000   3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 8.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :79       NA's   :79       NA's   :79       NA's   :79      
     hiking           gaming          clubbing         reading      
 Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 1.000  
 1st Qu.: 4.000   1st Qu.: 2.000   1st Qu.: 4.000   1st Qu.: 7.000  
 Median : 6.000   Median : 3.000   Median : 6.000   Median : 8.000  
 Mean   : 5.737   Mean   : 3.881   Mean   : 5.746   Mean   : 7.679  
 3rd Qu.: 8.000   3rd Qu.: 6.000   3rd Qu.: 8.000   3rd Qu.: 9.000  
 Max.   :10.000   Max.   :14.000   Max.   :10.000   Max.   :13.000  
 NA's   :79       NA's   :79       NA's   :79       NA's   :79      
       tv            theater           movies         concerts     
 Min.   : 1.000   Min.   : 0.000   Min.   : 0.00   Min.   : 0.000  
 1st Qu.: 3.000   1st Qu.: 5.000   1st Qu.: 7.00   1st Qu.: 5.000  
 Median : 6.000   Median : 7.000   Median : 8.00   Median : 7.000  
 Mean   : 5.304   Mean   : 6.776   Mean   : 7.92   Mean   : 6.825  
 3rd Qu.: 7.000   3rd Qu.: 9.000   3rd Qu.: 9.00   3rd Qu.: 8.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.00   Max.   :10.000  
 NA's   :79       NA's   :79       NA's   :79      NA's   :79      
     music           shopping           yoga           exphappy     
 Min.   : 1.000   Min.   : 1.000   Min.   : 0.000   Min.   : 1.000  
 1st Qu.: 7.000   1st Qu.: 4.000   1st Qu.: 2.000   1st Qu.: 5.000  
 Median : 8.000   Median : 6.000   Median : 4.000   Median : 6.000  
 Mean   : 7.851   Mean   : 5.631   Mean   : 4.339   Mean   : 5.534  
 3rd Qu.: 9.000   3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.: 7.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :79       NA's   :79       NA's   :79       NA's   :101     
     expnum          attr1_1          sinc1_1         intel1_1    
 Min.   : 0.000   Min.   :  0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.: 2.000   1st Qu.: 15.00   1st Qu.:15.00   1st Qu.:17.39  
 Median : 4.000   Median : 20.00   Median :18.18   Median :20.00  
 Mean   : 5.571   Mean   : 22.51   Mean   :17.40   Mean   :20.27  
 3rd Qu.: 8.000   3rd Qu.: 25.00   3rd Qu.:20.00   3rd Qu.:23.81  
 Max.   :20.000   Max.   :100.00   Max.   :60.00   Max.   :50.00  
 NA's   :6578     NA's   :79       NA's   :79      NA's   :79     
     fun1_1          amb1_1         shar1_1         attr4_1     
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   : 5.00  
 1st Qu.:15.00   1st Qu.: 5.00   1st Qu.: 9.52   1st Qu.:10.00  
 Median :18.00   Median :10.00   Median :10.64   Median :25.00  
 Mean   :17.46   Mean   :10.68   Mean   :11.85   Mean   :26.39  
 3rd Qu.:20.00   3rd Qu.:15.00   3rd Qu.:16.00   3rd Qu.:35.00  
 Max.   :50.00   Max.   :53.00   Max.   :30.00   Max.   :95.00  
 NA's   :89      NA's   :99      NA's   :121     NA's   :1889   
    sinc4_1         intel4_1         fun4_1          amb4_1     
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.: 6.00   1st Qu.: 8.00   1st Qu.:10.00   1st Qu.: 5.00  
 Median :10.00   Median :10.00   Median :15.00   Median :10.00  
 Mean   :11.07   Mean   :12.64   Mean   :15.57   Mean   : 9.78  
 3rd Qu.:15.00   3rd Qu.:16.00   3rd Qu.:20.00   3rd Qu.:15.00  
 Max.   :35.00   Max.   :35.00   Max.   :45.00   Max.   :50.00  
 NA's   :1889    NA's   :1889    NA's   :1889    NA's   :1889   
    shar4_1         attr2_1          sinc2_1         intel2_1    
 Min.   : 0.00   Min.   :  0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.: 7.00   1st Qu.: 20.00   1st Qu.:10.00   1st Qu.:10.00  
 Median :10.00   Median : 25.00   Median :15.00   Median :15.00  
 Mean   :11.01   Mean   : 30.36   Mean   :13.27   Mean   :14.42  
 3rd Qu.:15.00   3rd Qu.: 40.00   3rd Qu.:18.75   3rd Qu.:20.00  
 Max.   :40.00   Max.   :100.00   Max.   :50.00   Max.   :40.00  
 NA's   :1911    NA's   :79       NA's   :79      NA's   :79     
     fun2_1          amb2_1         shar2_1         attr3_1      
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   : 2.000  
 1st Qu.:15.00   1st Qu.: 6.00   1st Qu.:10.00   1st Qu.: 6.000  
 Median :20.00   Median :10.00   Median :10.00   Median : 7.000  
 Mean   :18.42   Mean   :11.74   Mean   :11.85   Mean   : 7.085  
 3rd Qu.:20.00   3rd Qu.:15.00   3rd Qu.:15.63   3rd Qu.: 8.000  
 Max.   :50.00   Max.   :50.00   Max.   :30.00   Max.   :10.000  
 NA's   :79      NA's   :89      NA's   :89      NA's   :105     
    sinc3_1           fun3_1          intel3_1          amb3_1      
 Min.   : 2.000   Min.   : 2.000   Min.   : 3.000   Min.   : 2.000  
 1st Qu.: 8.000   1st Qu.: 7.000   1st Qu.: 8.000   1st Qu.: 7.000  
 Median : 8.000   Median : 8.000   Median : 8.000   Median : 8.000  
 Mean   : 8.295   Mean   : 7.704   Mean   : 8.404   Mean   : 7.578  
 3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 9.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :105      NA's   :105      NA's   :105      NA's   :105     
    attr5_1          sinc5_1          intel5_1          fun5_1      
 Min.   : 2.000   Min.   : 1.000   Min.   : 3.000   Min.   : 2.000  
 1st Qu.: 6.000   1st Qu.: 7.000   1st Qu.: 8.000   1st Qu.: 6.000  
 Median : 7.000   Median : 8.000   Median : 8.000   Median : 8.000  
 Mean   : 6.942   Mean   : 7.927   Mean   : 8.284   Mean   : 7.426  
 3rd Qu.: 8.000   3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 9.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :3472     NA's   :3472     NA's   :3472     NA's   :3472    
     amb5_1            dec              attr            sinc       
 Min.   : 1.000   Min.   :0.0000   Min.   : 0.00   Min.   : 0.000  
 1st Qu.: 7.000   1st Qu.:0.0000   1st Qu.: 5.00   1st Qu.: 6.000  
 Median : 8.000   Median :0.0000   Median : 6.00   Median : 7.000  
 Mean   : 7.618   Mean   :0.4199   Mean   : 6.19   Mean   : 7.175  
 3rd Qu.: 9.000   3rd Qu.:1.0000   3rd Qu.: 8.00   3rd Qu.: 8.000  
 Max.   :10.000   Max.   :1.0000   Max.   :10.00   Max.   :10.000  
 NA's   :3472                      NA's   :202     NA's   :277     
     intel             fun              amb              shar       
 Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
 1st Qu.: 6.000   1st Qu.: 5.000   1st Qu.: 6.000   1st Qu.: 4.000  
 Median : 7.000   Median : 7.000   Median : 7.000   Median : 6.000  
 Mean   : 7.369   Mean   : 6.401   Mean   : 6.777   Mean   : 5.475  
 3rd Qu.: 8.000   3rd Qu.: 8.000   3rd Qu.: 8.000   3rd Qu.: 7.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :296      NA's   :350      NA's   :712      NA's   :1067    
      like             prob             met            match_es     
 Min.   : 0.000   Min.   : 0.000   Min.   :0.0000   Min.   : 0.000  
 1st Qu.: 5.000   1st Qu.: 4.000   1st Qu.:0.0000   1st Qu.: 2.000  
 Median : 6.000   Median : 5.000   Median :0.0000   Median : 3.000  
 Mean   : 6.134   Mean   : 5.208   Mean   :0.9488   Mean   : 3.208  
 3rd Qu.: 7.000   3rd Qu.: 7.000   3rd Qu.:2.0000   3rd Qu.: 4.000  
 Max.   :10.000   Max.   :10.000   Max.   :8.0000   Max.   :18.000  
 NA's   :240      NA's   :309      NA's   :375      NA's   :1173    
    attr1_s         sinc1_s         intel1_s         fun1_s     
 Min.   : 3.00   Min.   : 0.00   Min.   : 0.00   Min.   : 1.00  
 1st Qu.:14.81   1st Qu.:10.00   1st Qu.:10.00   1st Qu.:10.00  
 Median :17.65   Median :15.79   Median :18.42   Median :15.91  
 Mean   :20.79   Mean   :15.43   Mean   :17.24   Mean   :15.26  
 3rd Qu.:25.00   3rd Qu.:20.00   3rd Qu.:20.00   3rd Qu.:20.00  
 Max.   :95.00   Max.   :50.00   Max.   :40.00   Max.   :40.00  
 NA's   :4282    NA's   :4282    NA's   :4282    NA's   :4282   
     amb1_s         shar1_s         attr3_s          sinc3_s      
 Min.   : 0.00   Min.   : 0.00   Min.   : 3.000   Min.   : 1.000  
 1st Qu.: 7.00   1st Qu.: 9.00   1st Qu.: 7.000   1st Qu.: 7.000  
 Median :10.00   Median :12.50   Median : 7.000   Median : 8.000  
 Mean   :11.14   Mean   :12.46   Mean   : 7.211   Mean   : 8.082  
 3rd Qu.:15.00   3rd Qu.:16.28   3rd Qu.: 8.000   3rd Qu.: 9.000  
 Max.   :23.81   Max.   :30.00   Max.   :10.000   Max.   :10.000  
 NA's   :4282    NA's   :4282    NA's   :4378     NA's   :4378    
    intel3_s          fun3_s           amb3_s          satis_2      
 Min.   : 4.000   Min.   : 3.000   Min.   : 2.000   Min.   : 1.000  
 1st Qu.: 8.000   1st Qu.: 7.000   1st Qu.: 7.000   1st Qu.: 5.000  
 Median : 8.000   Median : 8.000   Median : 8.000   Median : 6.000  
 Mean   : 8.258   Mean   : 7.692   Mean   : 7.589   Mean   : 5.712  
 3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 7.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :4378     NA's   :4378     NA's   :4378     NA's   :915     
     length         numdat_2        attr7_2         sinc7_2     
 Min.   :1.000   Min.   :1.000   Min.   :10.00   Min.   : 0.00  
 1st Qu.:1.000   1st Qu.:2.000   1st Qu.:20.00   1st Qu.:10.00  
 Median :1.000   Median :2.000   Median :30.00   Median :10.00  
 Mean   :1.843   Mean   :2.338   Mean   :32.82   Mean   :13.53  
 3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:40.00   3rd Qu.:20.00  
 Max.   :3.000   Max.   :3.000   Max.   :80.00   Max.   :40.00  
 NA's   :915     NA's   :945     NA's   :6394    NA's   :6423   
    intel7_2         fun7_2          amb7_2          shar7_2     
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.000   Min.   : 0.00  
 1st Qu.:10.00   1st Qu.:10.00   1st Qu.: 0.000   1st Qu.: 5.00  
 Median :15.00   Median :20.00   Median : 5.000   Median :10.00  
 Mean   :15.29   Mean   :18.87   Mean   : 7.287   Mean   :12.16  
 3rd Qu.:20.00   3rd Qu.:24.00   3rd Qu.:10.000   3rd Qu.:20.00  
 Max.   :50.00   Max.   :50.00   Max.   :20.000   Max.   :40.00  
 NA's   :6394    NA's   :6394    NA's   :6423     NA's   :6404   
    attr1_2         sinc1_2         intel1_2         fun1_2     
 Min.   : 5.00   Min.   : 0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.:16.67   1st Qu.:10.00   1st Qu.:15.00   1st Qu.:15.00  
 Median :20.00   Median :16.67   Median :19.05   Median :18.37  
 Mean   :26.22   Mean   :15.87   Mean   :17.81   Mean   :17.65  
 3rd Qu.:30.00   3rd Qu.:20.00   3rd Qu.:20.00   3rd Qu.:20.00  
 Max.   :85.00   Max.   :50.00   Max.   :40.00   Max.   :50.00  
 NA's   :933     NA's   :915     NA's   :915     NA's   :915    
     amb1_2          shar1_2         attr4_2          sinc4_2     
 Min.   : 0.000   Min.   : 0.00   Min.   :  6.00   Min.   : 0.00  
 1st Qu.: 5.000   1st Qu.:10.00   1st Qu.: 10.00   1st Qu.: 8.00  
 Median :10.000   Median :13.00   Median : 25.00   Median :10.00  
 Mean   : 9.913   Mean   :12.76   Mean   : 26.81   Mean   :11.93  
 3rd Qu.:15.000   3rd Qu.:16.67   3rd Qu.: 40.00   3rd Qu.:15.00  
 Max.   :22.220   Max.   :35.00   Max.   :100.00   Max.   :35.00  
 NA's   :915      NA's   :915     NA's   :2603     NA's   :2603   
    intel4_2        fun4_2          amb4_2          shar4_2     
 Min.   : 0.0   Min.   : 0.00   Min.   : 0.000   Min.   : 0.00  
 1st Qu.: 8.0   1st Qu.: 9.00   1st Qu.: 5.000   1st Qu.: 7.00  
 Median :10.0   Median :15.00   Median :10.000   Median :10.00  
 Mean   :12.1   Mean   :15.16   Mean   : 9.342   Mean   :11.32  
 3rd Qu.:15.0   3rd Qu.:20.00   3rd Qu.:10.000   3rd Qu.:15.00  
 Max.   :40.0   Max.   :50.00   Max.   :35.000   Max.   :40.00  
 NA's   :2603   NA's   :2603    NA's   :2603     NA's   :2603   
    attr2_2         sinc2_2         intel2_2         fun2_2     
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.:19.15   1st Qu.:10.00   1st Qu.:10.00   1st Qu.:15.00  
 Median :25.00   Median :15.00   Median :15.00   Median :18.52  
 Mean   :29.34   Mean   :13.90   Mean   :13.96   Mean   :17.97  
 3rd Qu.:38.46   3rd Qu.:19.23   3rd Qu.:17.39   3rd Qu.:20.00  
 Max.   :85.00   Max.   :40.00   Max.   :30.77   Max.   :40.00  
 NA's   :2603    NA's   :2603    NA's   :2603    NA's   :2603   
     amb2_2         shar2_2         attr3_2          sinc3_2      
 Min.   : 0.00   Min.   : 0.00   Min.   : 2.000   Min.   : 2.000  
 1st Qu.:10.00   1st Qu.:10.00   1st Qu.: 7.000   1st Qu.: 7.000  
 Median :10.00   Median :13.95   Median : 7.000   Median : 8.000  
 Mean   :11.91   Mean   :12.89   Mean   : 7.125   Mean   : 7.931  
 3rd Qu.:15.09   3rd Qu.:16.52   3rd Qu.: 8.000   3rd Qu.: 9.000  
 Max.   :50.00   Max.   :30.00   Max.   :10.000   Max.   :10.000  
 NA's   :2603    NA's   :2603    NA's   :915      NA's   :915     
    intel3_2          fun3_2           amb3_2          attr5_2      
 Min.   : 4.000   Min.   : 1.000   Min.   : 2.000   Min.   : 2.000  
 1st Qu.: 8.000   1st Qu.: 7.000   1st Qu.: 7.000   1st Qu.: 6.000  
 Median : 8.000   Median : 8.000   Median : 8.000   Median : 7.000  
 Mean   : 8.239   Mean   : 7.602   Mean   : 7.487   Mean   : 6.828  
 3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 8.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :915      NA's   :915      NA's   :915      NA's   :4001    
    sinc5_2          intel5_2          fun5_2           amb5_2      
 Min.   : 2.000   Min.   : 2.000   Min.   : 2.000   Min.   : 2.000  
 1st Qu.: 6.000   1st Qu.: 7.000   1st Qu.: 6.000   1st Qu.: 6.000  
 Median : 8.000   Median : 8.000   Median : 7.000   Median : 7.000  
 Mean   : 7.394   Mean   : 7.839   Mean   : 7.279   Mean   : 7.332  
 3rd Qu.: 8.000   3rd Qu.: 9.000   3rd Qu.: 8.000   3rd Qu.: 8.000  
 Max.   :10.000   Max.   :10.000   Max.   :10.000   Max.   :10.000  
 NA's   :4001     NA's   :4001     NA's   :4001     NA's   :4001    
    you_call         them_cal         date_3         numdat_3    
 Min.   : 0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
 1st Qu.: 0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:1.000  
 Median : 0.000   Median :1.000   Median :0.000   Median :1.000  
 Mean   : 0.781   Mean   :0.982   Mean   :0.377   Mean   :1.231  
 3rd Qu.: 1.000   3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000  
 Max.   :21.000   Max.   :9.000   Max.   :1.000   Max.   :9.000  
 NA's   :4404     NA's   :4404    NA's   :4404    NA's   :6882   
    num_in_3        attr1_3         sinc1_3         intel1_3    
 Min.   :0.000   Min.   : 0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.:1.000   1st Qu.:15.22   1st Qu.:10.00   1st Qu.:16.67  
 Median :1.000   Median :20.00   Median :16.67   Median :20.00  
 Mean   :0.934   Mean   :24.39   Mean   :16.59   Mean   :19.41  
 3rd Qu.:1.000   3rd Qu.:30.00   3rd Qu.:20.00   3rd Qu.:20.00  
 Max.   :4.000   Max.   :80.00   Max.   :65.00   Max.   :45.00  
 NA's   :7710    NA's   :4404    NA's   :4404    NA's   :4404   
     fun1_3          amb1_3        shar1_3         attr7_3         sinc7_3     
 Min.   : 0.00   Min.   : 0.0   Min.   : 0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.:14.81   1st Qu.: 5.0   1st Qu.:10.00   1st Qu.:20.00   1st Qu.:10.00  
 Median :16.33   Median :10.0   Median :14.29   Median :25.00   Median :15.00  
 Mean   :16.23   Mean   :10.9   Mean   :12.70   Mean   :31.33   Mean   :15.65  
 3rd Qu.:20.00   3rd Qu.:15.0   3rd Qu.:16.67   3rd Qu.:40.00   3rd Qu.:20.00  
 Max.   :30.00   Max.   :30.0   Max.   :55.00   Max.   :80.00   Max.   :60.00  
 NA's   :4404    NA's   :4404   NA's   :4404    NA's   :6362    NA's   :6362   
    intel7_3         fun7_3          amb7_3          shar7_3     
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.000   Min.   : 0.00  
 1st Qu.:10.00   1st Qu.:10.00   1st Qu.: 0.000   1st Qu.: 5.00  
 Median :18.00   Median :17.00   Median :10.000   Median :10.00  
 Mean   :16.68   Mean   :16.42   Mean   : 7.824   Mean   :12.21  
 3rd Qu.:20.00   3rd Qu.:20.00   3rd Qu.:10.000   3rd Qu.:20.00  
 Max.   :45.00   Max.   :40.00   Max.   :30.000   Max.   :55.00  
 NA's   :6362    NA's   :6362    NA's   :6362     NA's   :6362   
    attr4_3         sinc4_3         intel4_3         fun4_3     
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   : 0.00  
 1st Qu.:10.00   1st Qu.: 7.00   1st Qu.: 7.00   1st Qu.: 9.00  
 Median :20.00   Median :10.00   Median :10.00   Median :12.00  
 Mean   :25.61   Mean   :10.75   Mean   :11.53   Mean   :14.28  
 3rd Qu.:37.00   3rd Qu.:15.00   3rd Qu.:15.00   3rd Qu.:20.00  
 Max.   :80.00   Max.   :40.00   Max.   :30.00   Max.   :30.00  
 NA's   :5419    NA's   :5419    NA's   :5419    NA's   :5419   
     amb4_3          shar4_3         attr2_3         sinc2_3     
 Min.   : 0.000   Min.   : 0.00   Min.   : 5.00   Min.   : 0.00  
 1st Qu.: 5.000   1st Qu.: 7.00   1st Qu.:10.00   1st Qu.: 7.00  
 Median : 9.000   Median :10.00   Median :20.00   Median :10.00  
 Mean   : 9.208   Mean   :11.25   Mean   :24.97   Mean   :10.92  
 3rd Qu.:10.000   3rd Qu.:15.00   3rd Qu.:35.00   3rd Qu.:15.00  
 Max.   :40.000   Max.   :45.00   Max.   :80.00   Max.   :50.00  
 NA's   :5419     NA's   :5419    NA's   :5419    NA's   :5419   
    intel2_3         fun2_3          amb2_3          shar2_3     
 Min.   : 0.00   Min.   : 0.00   Min.   : 0.000   Min.   : 0.00  
 1st Qu.: 7.00   1st Qu.: 9.00   1st Qu.: 6.000   1st Qu.: 5.00  
 Median :10.00   Median :15.00   Median :10.000   Median :10.00  
 Mean   :11.95   Mean   :14.96   Mean   : 9.526   Mean   :11.97  
 3rd Qu.:15.00   3rd Qu.:20.00   3rd Qu.:10.000   3rd Qu.:15.00  
 Max.   :60.00   Max.   :40.00   Max.   :50.000   Max.   :45.00  
 NA's   :5419    NA's   :5419    NA's   :5419     NA's   :6362   
    attr3_3         sinc3_3          intel3_3          fun3_3      
 Min.   : 2.00   Min.   : 2.000   Min.   : 3.000   Min.   : 2.000  
 1st Qu.: 7.00   1st Qu.: 7.000   1st Qu.: 8.000   1st Qu.: 7.000  
 Median : 7.00   Median : 8.000   Median : 8.000   Median : 8.000  
 Mean   : 7.24   Mean   : 8.093   Mean   : 8.389   Mean   : 7.659  
 3rd Qu.: 8.00   3rd Qu.: 9.000   3rd Qu.: 9.000   3rd Qu.: 9.000  
 Max.   :12.00   Max.   :12.000   Max.   :12.000   Max.   :12.000  
 NA's   :4404    NA's   :4404     NA's   :4404     NA's   :4404    
     amb3_3          attr5_3         sinc5_3          intel5_3     
 Min.   : 1.000   Min.   : 2.00   Min.   : 2.000   Min.   : 4.000  
 1st Qu.: 6.000   1st Qu.: 6.00   1st Qu.: 7.000   1st Qu.: 7.000  
 Median : 8.000   Median : 7.00   Median : 8.000   Median : 8.000  
 Mean   : 7.392   Mean   : 6.81   Mean   : 7.615   Mean   : 7.933  
 3rd Qu.: 9.000   3rd Qu.: 8.00   3rd Qu.: 9.000   3rd Qu.: 9.000  
 Max.   :12.000   Max.   :10.00   Max.   :10.000   Max.   :10.000  
 NA's   :4404     NA's   :6362    NA's   :6362     NA's   :6362    
     fun5_3           amb5_3      
 Min.   : 1.000   Min.   : 1.000  
 1st Qu.: 6.000   1st Qu.: 6.000  
 Median : 7.000   Median : 7.000  
 Mean   : 7.155   Mean   : 7.049  
 3rd Qu.: 8.000   3rd Qu.: 8.000  
 Max.   :10.000   Max.   :10.000  
 NA's   :6362     NA's   :6362    

Line Plot

spddat_plot1 = data.frame(time=c("Before", "During", "Days After", "Weeks After",
                                 "Before", "During", "Days After", "Weeks After")
                           ,match=c(0, 0, 0, 0,
                                    1, 1, 1, 1)
                           ,shar_imp=c(mean(spd_dat[!is.na(spd_dat$shar1_1) & spd_dat$match == 0,]$shar1_1),
                                       mean(spd_dat[!is.na(spd_dat$shar1_s) & spd_dat$match == 0,]$shar1_s),
                                       mean(spd_dat[!is.na(spd_dat$shar7_2) & spd_dat$match == 0,]$shar7_2),
                                       mean(spd_dat[!is.na(spd_dat$shar1_2) & spd_dat$match == 0,]$shar1_2),
                                       mean(spd_dat[!is.na(spd_dat$shar1_1) & spd_dat$match == 1,]$shar1_1),
                                       mean(spd_dat[!is.na(spd_dat$shar1_s) & spd_dat$match == 1,]$shar1_s),
                                       mean(spd_dat[!is.na(spd_dat$shar7_2) & spd_dat$match == 1,]$shar7_2),
                                       mean(spd_dat[!is.na(spd_dat$shar1_2) & spd_dat$match == 1,]$shar1_2)))
#spd_dat <- spd_dat[!is.na(spd_dat$shar1_1),]
#print(mean(spd_dat$shar1_1))
spddat_plot1
         time match shar_imp
1      Before     0 11.98161
2      During     0 12.54435
3  Days After     0 12.37751
4 Weeks After     0 12.82038
5      Before     1 11.15288
6      During     1 12.01364
7  Days After     1 11.04863
8 Weeks After     1 12.46087

Line chart over time with colors for different categories

ggplot(spddat_plot1, aes(x=time, y=shar_imp, group=match, colour=as.character(match))) +
  geom_line() +
  geom_point() +
  scale_colour_discrete(labels=c("No Match", "Match"), type=c("#7b3494", "#008837")) +
  labs(title = "Importance of Shared Interests over Time Surveyed",
       colour = "Match") +
  xlab("Time surveyed") +
  ylab("Importance of shared interests") +
  theme_light() +
  theme(text = element_text(family="Avenir")) +
  ylim(c(11, 13))

Smoothed Line Chart

Line chart of order vs. amount liked with match and gender

spddat_plot2 <- subset(spd_dat, select=c(order, like, match, gender))
#summary(spddat_plot2) # reveals that like column has 240 NAs
spddat_plot2 <- na.omit(spddat_plot2)
#spddat_plot2 <- head(spddat_plot2, 50)
summary(spddat_plot2)
     order             like            match            gender      
 Min.   : 1.000   Min.   : 0.000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.: 4.000   1st Qu.: 5.000   1st Qu.:0.0000   1st Qu.:0.0000  
 Median : 8.000   Median : 6.000   Median :0.0000   Median :1.0000  
 Mean   : 8.843   Mean   : 6.134   Mean   :0.1691   Mean   :0.5025  
 3rd Qu.:13.000   3rd Qu.: 7.000   3rd Qu.:0.0000   3rd Qu.:1.0000  
 Max.   :22.000   Max.   :10.000   Max.   :1.0000   Max.   :1.0000  
ggplot(spddat_plot2, aes(x=order, y=like, group=interaction(match, gender), color=as.character(match))) +
  geom_smooth(aes(linetype=as.character(gender))) +
  #geom_point(position="jitter") +
  scale_colour_discrete(labels=c("No Match", "Match"), type=c("#7b3494", "#008837")) +
  scale_linetype(labels=c("Female", "Male")) +
  labs(title = "Amount liked match vs. order met",
       colour = "Match",
       linetype = "Gender") +
  xlab("Order met in night") +
  ylab("Amount liked match") +
  theme_light() +
  theme(text = element_text(family="Avenir")) +
  ylim(c(1, 10))
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 8 rows containing non-finite values (stat_smooth).

Bubble Plot

spddat_plot3 <- subset(spd_dat, select=c(income, gender, mn_sat, age, field_cd, like))

spddat_plot3$income <- gsub(",", "", spddat_plot3$income)
spddat_plot3$income <- as.numeric(spddat_plot3$income)

spddat_plot3$mn_sat <- gsub(",", "", spddat_plot3$mn_sat)
spddat_plot3$mn_sat <- as.numeric(spddat_plot3$mn_sat)

spddat_plot3 <- na.omit(spddat_plot3)
summary(spddat_plot3)
     income           gender           mn_sat          age       
 Min.   :  8607   Min.   :0.0000   Min.   : 914   Min.   :21.00  
 1st Qu.: 35187   1st Qu.:0.0000   1st Qu.:1210   1st Qu.:24.00  
 Median : 46272   Median :0.0000   Median :1310   Median :26.00  
 Mean   : 46838   Mean   :0.3431   Mean   :1297   Mean   :26.32  
 3rd Qu.: 55080   3rd Qu.:1.0000   3rd Qu.:1400   3rd Qu.:28.00  
 Max.   :106663   Max.   :1.0000   Max.   :1470   Max.   :55.00  
    field_cd           like       
 Min.   : 1.000   Min.   : 0.000  
 1st Qu.: 5.000   1st Qu.: 5.000  
 Median : 8.000   Median : 6.000  
 Mean   : 7.879   Mean   : 6.006  
 3rd Qu.:11.000   3rd Qu.: 7.000  
 Max.   :18.000   Max.   :10.000  
spddat_plot3 %>% distinct() -> spddat_plot3
ggplot(spddat_plot3, aes(x=mn_sat, y=income, group=gender, color=as.character(gender))) +
  geom_point(aes(size=age), alpha=0.7) +
  scale_y_continuous(trans="log10") +
  scale_colour_discrete(labels=c("Female", "Male"), type=c("#7b3494", "#008837")) +
  scale_size(range=c(2, 7), breaks=c(22, 30, 40, 55)) +
  labs(title = "Demographics by Home Area",
       colour = "Gender",
       size = "Age") +
  xlab("Median SAT Score") +
  ylab("Income of Home Area") +
  theme_light() +
  theme(text = element_text(family="Avenir"))

ggplot(spddat_plot3, aes(x=field_cd, y=income, group=gender, color=as.character(gender))) +
  geom_point(aes(size=like), alpha=0.7) +
  scale_y_continuous(trans="log10") +
  scale_colour_discrete(labels=c("Female", "Male"), type=c("#7b3494", "#008837")) +
  scale_size(range=c(0.25, 6)) +
  labs(title = "",
       colour = "Gender",
       size = "Amount Liked") +
  xlab("Field of Major") +
  ylab("Income of Home Area") +
  theme_light() +
  theme(text = element_text(family="Avenir"))

Interest Correlation Plots

sdp4 <- subset(spd_dat, select=c(int_corr, shar1_1, match, wave))
sdp4 <- na.omit(sdp4)
summary(sdp4)
    int_corr          shar1_1          match             wave     
 Min.   :-0.8300   Min.   : 0.00   Min.   :0.0000   Min.   : 1.0  
 1st Qu.:-0.0100   1st Qu.: 9.52   1st Qu.:0.0000   1st Qu.: 7.0  
 Median : 0.2100   Median :10.64   Median :0.0000   Median :11.0  
 Mean   : 0.1974   Mean   :11.85   Mean   :0.1645   Mean   :11.4  
 3rd Qu.: 0.4300   3rd Qu.:16.00   3rd Qu.:0.0000   3rd Qu.:15.0  
 Max.   : 0.9100   Max.   :30.00   Max.   :1.0000   Max.   :21.0  
ggplot(sdp4, aes(x=shar1_1, y=int_corr, colour=as.character(match), group=match)) +
  geom_point(alpha = 0.6) +
  geom_smooth() +
  scale_colour_discrete(labels=c("No Match", "Match"), type=c("#7b3494", "#008837")) +
  labs(title = "Interest Correlation and Importance of Shared Interest",
       colour = "Match") +
  xlab("Importance of Shared Interests") +
  ylab("Interest Correlation") +
  theme_light() +
  theme(text = element_text(family="Avenir")) +
  geom_hline(aes(yintercept=0))
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(sdp4, aes(x=shar1_1, y=int_corr, colour=int_corr)) +
  geom_point() +
  scale_colour_distiller(palette = "RdYlGn") + #PRGn
  labs(title = "Interest Correlation and Importance of Shared Interest") +
  xlab("Importance of Shared Interests") +
  ylab("Interest Correlation") +
  theme_grey() +
  theme(text = element_text(family="Avenir"),
        legend.position = "none") +
  geom_hline(aes(yintercept=0), color="#ffffbf")