Load in libraries
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ tidyr 1.2.1 ✔ stringr 1.4.1
✔ readr 2.1.2 ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
Read in speed dating data (for preliminary graphs, only use data without nulls)
data_path <- "speed_dating_data.csv"
spd_dat <- read.csv (data_path)
summary (spd_dat)
iid id gender idg
Min. : 1.0 Min. : 1.00 Min. :0.0000 Min. : 1.00
1st Qu.:154.0 1st Qu.: 4.00 1st Qu.:0.0000 1st Qu.: 8.00
Median :281.0 Median : 8.00 Median :1.0000 Median :16.00
Mean :283.7 Mean : 8.96 Mean :0.5006 Mean :17.33
3rd Qu.:407.0 3rd Qu.:13.00 3rd Qu.:1.0000 3rd Qu.:26.00
Max. :552.0 Max. :22.00 Max. :1.0000 Max. :44.00
NA's :1
condtn wave round position
Min. :1.000 Min. : 1.00 Min. : 5.00 Min. : 1.000
1st Qu.:2.000 1st Qu.: 7.00 1st Qu.:14.00 1st Qu.: 4.000
Median :2.000 Median :11.00 Median :18.00 Median : 8.000
Mean :1.829 Mean :11.35 Mean :16.87 Mean : 9.043
3rd Qu.:2.000 3rd Qu.:15.00 3rd Qu.:20.00 3rd Qu.:13.000
Max. :2.000 Max. :21.00 Max. :22.00 Max. :22.000
positin1 order partner pid
Min. : 1.000 Min. : 1.000 Min. : 1.000 Min. : 1.0
1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.: 4.000 1st Qu.:154.0
Median : 9.000 Median : 8.000 Median : 8.000 Median :281.0
Mean : 9.296 Mean : 8.928 Mean : 8.964 Mean :283.9
3rd Qu.:14.000 3rd Qu.:13.000 3rd Qu.:13.000 3rd Qu.:408.0
Max. :22.000 Max. :22.000 Max. :22.000 Max. :552.0
NA's :1846 NA's :10
match int_corr samerace age_o
Min. :0.0000 Min. :-0.830 Min. :0.0000 Min. :18.00
1st Qu.:0.0000 1st Qu.:-0.020 1st Qu.:0.0000 1st Qu.:24.00
Median :0.0000 Median : 0.210 Median :0.0000 Median :26.00
Mean :0.1647 Mean : 0.196 Mean :0.3958 Mean :26.36
3rd Qu.:0.0000 3rd Qu.: 0.430 3rd Qu.:1.0000 3rd Qu.:28.00
Max. :1.0000 Max. : 0.910 Max. :1.0000 Max. :55.00
NA's :158 NA's :104
race_o pf_o_att pf_o_sin pf_o_int
Min. :1.000 Min. : 0.0 Min. : 0.00 Min. : 0.00
1st Qu.:2.000 1st Qu.: 15.0 1st Qu.:15.00 1st Qu.:17.39
Median :2.000 Median : 20.0 Median :18.37 Median :20.00
Mean :2.757 Mean : 22.5 Mean :17.40 Mean :20.27
3rd Qu.:4.000 3rd Qu.: 25.0 3rd Qu.:20.00 3rd Qu.:23.81
Max. :6.000 Max. :100.0 Max. :60.00 Max. :50.00
NA's :73 NA's :89 NA's :89 NA's :89
pf_o_fun pf_o_amb pf_o_sha dec_o
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
1st Qu.:15.00 1st Qu.: 5.00 1st Qu.: 9.52 1st Qu.:0.0000
Median :18.00 Median :10.00 Median :10.64 Median :0.0000
Mean :17.46 Mean :10.69 Mean :11.85 Mean :0.4196
3rd Qu.:20.00 3rd Qu.:15.00 3rd Qu.:16.00 3rd Qu.:1.0000
Max. :50.00 Max. :53.00 Max. :30.00 Max. :1.0000
NA's :98 NA's :107 NA's :129
attr_o sinc_o intel_o fun_o
Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 5.00 1st Qu.: 6.000 1st Qu.: 6.000 1st Qu.: 5.000
Median : 6.00 Median : 7.000 Median : 7.000 Median : 7.000
Mean : 6.19 Mean : 7.175 Mean : 7.369 Mean : 6.401
3rd Qu.: 8.00 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 8.000
Max. :10.50 Max. :10.000 Max. :10.000 Max. :11.000
NA's :212 NA's :287 NA's :306 NA's :360
amb_o shar_o like_o prob_o
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 6.000 1st Qu.: 4.000 1st Qu.: 5.000 1st Qu.: 4.000
Median : 7.000 Median : 6.000 Median : 6.000 Median : 5.000
Mean : 6.778 Mean : 5.475 Mean : 6.135 Mean : 5.208
3rd Qu.: 8.000 3rd Qu.: 7.000 3rd Qu.: 7.000 3rd Qu.: 7.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :722 NA's :1076 NA's :250 NA's :318
met_o age field field_cd
Min. :1.00 Min. :18.00 Length:8378 Min. : 1.000
1st Qu.:2.00 1st Qu.:24.00 Class :character 1st Qu.: 5.000
Median :2.00 Median :26.00 Mode :character Median : 8.000
Mean :1.96 Mean :26.36 Mean : 7.662
3rd Qu.:2.00 3rd Qu.:28.00 3rd Qu.:10.000
Max. :8.00 Max. :55.00 Max. :18.000
NA's :385 NA's :95 NA's :82
undergra mn_sat tuition race
Length:8378 Length:8378 Length:8378 Min. :1.000
Class :character Class :character Class :character 1st Qu.:2.000
Mode :character Mode :character Mode :character Median :2.000
Mean :2.757
3rd Qu.:4.000
Max. :6.000
NA's :63
imprace imprelig from zipcode
Min. : 0.000 Min. : 1.000 Length:8378 Length:8378
1st Qu.: 1.000 1st Qu.: 1.000 Class :character Class :character
Median : 3.000 Median : 3.000 Mode :character Mode :character
Mean : 3.785 Mean : 3.652
3rd Qu.: 6.000 3rd Qu.: 6.000
Max. :10.000 Max. :10.000
NA's :79 NA's :79
income goal date go_out
Length:8378 Min. :1.000 Min. :1.000 Min. :1.000
Class :character 1st Qu.:1.000 1st Qu.:4.000 1st Qu.:1.000
Mode :character Median :2.000 Median :5.000 Median :2.000
Mean :2.122 Mean :5.007 Mean :2.158
3rd Qu.:2.000 3rd Qu.:6.000 3rd Qu.:3.000
Max. :6.000 Max. :7.000 Max. :7.000
NA's :79 NA's :97 NA's :79
career career_c sports tvsports
Length:8378 Min. : 1.000 Min. : 1.000 Min. : 1.000
Class :character 1st Qu.: 2.000 1st Qu.: 4.000 1st Qu.: 2.000
Mode :character Median : 6.000 Median : 7.000 Median : 4.000
Mean : 5.278 Mean : 6.425 Mean : 4.575
3rd Qu.: 7.000 3rd Qu.: 9.000 3rd Qu.: 7.000
Max. :17.000 Max. :10.000 Max. :10.000
NA's :138 NA's :79 NA's :79
exercise dining museums art
Min. : 1.000 Min. : 1.000 Min. : 0.000 Min. : 0.000
1st Qu.: 5.000 1st Qu.: 7.000 1st Qu.: 6.000 1st Qu.: 5.000
Median : 6.000 Median : 8.000 Median : 7.000 Median : 7.000
Mean : 6.246 Mean : 7.784 Mean : 6.986 Mean : 6.715
3rd Qu.: 8.000 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 8.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :79 NA's :79 NA's :79 NA's :79
hiking gaming clubbing reading
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 1.000
1st Qu.: 4.000 1st Qu.: 2.000 1st Qu.: 4.000 1st Qu.: 7.000
Median : 6.000 Median : 3.000 Median : 6.000 Median : 8.000
Mean : 5.737 Mean : 3.881 Mean : 5.746 Mean : 7.679
3rd Qu.: 8.000 3rd Qu.: 6.000 3rd Qu.: 8.000 3rd Qu.: 9.000
Max. :10.000 Max. :14.000 Max. :10.000 Max. :13.000
NA's :79 NA's :79 NA's :79 NA's :79
tv theater movies concerts
Min. : 1.000 Min. : 0.000 Min. : 0.00 Min. : 0.000
1st Qu.: 3.000 1st Qu.: 5.000 1st Qu.: 7.00 1st Qu.: 5.000
Median : 6.000 Median : 7.000 Median : 8.00 Median : 7.000
Mean : 5.304 Mean : 6.776 Mean : 7.92 Mean : 6.825
3rd Qu.: 7.000 3rd Qu.: 9.000 3rd Qu.: 9.00 3rd Qu.: 8.000
Max. :10.000 Max. :10.000 Max. :10.00 Max. :10.000
NA's :79 NA's :79 NA's :79 NA's :79
music shopping yoga exphappy
Min. : 1.000 Min. : 1.000 Min. : 0.000 Min. : 1.000
1st Qu.: 7.000 1st Qu.: 4.000 1st Qu.: 2.000 1st Qu.: 5.000
Median : 8.000 Median : 6.000 Median : 4.000 Median : 6.000
Mean : 7.851 Mean : 5.631 Mean : 4.339 Mean : 5.534
3rd Qu.: 9.000 3rd Qu.: 8.000 3rd Qu.: 7.000 3rd Qu.: 7.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :79 NA's :79 NA's :79 NA's :101
expnum attr1_1 sinc1_1 intel1_1
Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 2.000 1st Qu.: 15.00 1st Qu.:15.00 1st Qu.:17.39
Median : 4.000 Median : 20.00 Median :18.18 Median :20.00
Mean : 5.571 Mean : 22.51 Mean :17.40 Mean :20.27
3rd Qu.: 8.000 3rd Qu.: 25.00 3rd Qu.:20.00 3rd Qu.:23.81
Max. :20.000 Max. :100.00 Max. :60.00 Max. :50.00
NA's :6578 NA's :79 NA's :79 NA's :79
fun1_1 amb1_1 shar1_1 attr4_1
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 5.00
1st Qu.:15.00 1st Qu.: 5.00 1st Qu.: 9.52 1st Qu.:10.00
Median :18.00 Median :10.00 Median :10.64 Median :25.00
Mean :17.46 Mean :10.68 Mean :11.85 Mean :26.39
3rd Qu.:20.00 3rd Qu.:15.00 3rd Qu.:16.00 3rd Qu.:35.00
Max. :50.00 Max. :53.00 Max. :30.00 Max. :95.00
NA's :89 NA's :99 NA's :121 NA's :1889
sinc4_1 intel4_1 fun4_1 amb4_1
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 6.00 1st Qu.: 8.00 1st Qu.:10.00 1st Qu.: 5.00
Median :10.00 Median :10.00 Median :15.00 Median :10.00
Mean :11.07 Mean :12.64 Mean :15.57 Mean : 9.78
3rd Qu.:15.00 3rd Qu.:16.00 3rd Qu.:20.00 3rd Qu.:15.00
Max. :35.00 Max. :35.00 Max. :45.00 Max. :50.00
NA's :1889 NA's :1889 NA's :1889 NA's :1889
shar4_1 attr2_1 sinc2_1 intel2_1
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.: 7.00 1st Qu.: 20.00 1st Qu.:10.00 1st Qu.:10.00
Median :10.00 Median : 25.00 Median :15.00 Median :15.00
Mean :11.01 Mean : 30.36 Mean :13.27 Mean :14.42
3rd Qu.:15.00 3rd Qu.: 40.00 3rd Qu.:18.75 3rd Qu.:20.00
Max. :40.00 Max. :100.00 Max. :50.00 Max. :40.00
NA's :1911 NA's :79 NA's :79 NA's :79
fun2_1 amb2_1 shar2_1 attr3_1
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 2.000
1st Qu.:15.00 1st Qu.: 6.00 1st Qu.:10.00 1st Qu.: 6.000
Median :20.00 Median :10.00 Median :10.00 Median : 7.000
Mean :18.42 Mean :11.74 Mean :11.85 Mean : 7.085
3rd Qu.:20.00 3rd Qu.:15.00 3rd Qu.:15.63 3rd Qu.: 8.000
Max. :50.00 Max. :50.00 Max. :30.00 Max. :10.000
NA's :79 NA's :89 NA's :89 NA's :105
sinc3_1 fun3_1 intel3_1 amb3_1
Min. : 2.000 Min. : 2.000 Min. : 3.000 Min. : 2.000
1st Qu.: 8.000 1st Qu.: 7.000 1st Qu.: 8.000 1st Qu.: 7.000
Median : 8.000 Median : 8.000 Median : 8.000 Median : 8.000
Mean : 8.295 Mean : 7.704 Mean : 8.404 Mean : 7.578
3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 9.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :105 NA's :105 NA's :105 NA's :105
attr5_1 sinc5_1 intel5_1 fun5_1
Min. : 2.000 Min. : 1.000 Min. : 3.000 Min. : 2.000
1st Qu.: 6.000 1st Qu.: 7.000 1st Qu.: 8.000 1st Qu.: 6.000
Median : 7.000 Median : 8.000 Median : 8.000 Median : 8.000
Mean : 6.942 Mean : 7.927 Mean : 8.284 Mean : 7.426
3rd Qu.: 8.000 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 9.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :3472 NA's :3472 NA's :3472 NA's :3472
amb5_1 dec attr sinc
Min. : 1.000 Min. :0.0000 Min. : 0.00 Min. : 0.000
1st Qu.: 7.000 1st Qu.:0.0000 1st Qu.: 5.00 1st Qu.: 6.000
Median : 8.000 Median :0.0000 Median : 6.00 Median : 7.000
Mean : 7.618 Mean :0.4199 Mean : 6.19 Mean : 7.175
3rd Qu.: 9.000 3rd Qu.:1.0000 3rd Qu.: 8.00 3rd Qu.: 8.000
Max. :10.000 Max. :1.0000 Max. :10.00 Max. :10.000
NA's :3472 NA's :202 NA's :277
intel fun amb shar
Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
1st Qu.: 6.000 1st Qu.: 5.000 1st Qu.: 6.000 1st Qu.: 4.000
Median : 7.000 Median : 7.000 Median : 7.000 Median : 6.000
Mean : 7.369 Mean : 6.401 Mean : 6.777 Mean : 5.475
3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.: 7.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :296 NA's :350 NA's :712 NA's :1067
like prob met match_es
Min. : 0.000 Min. : 0.000 Min. :0.0000 Min. : 0.000
1st Qu.: 5.000 1st Qu.: 4.000 1st Qu.:0.0000 1st Qu.: 2.000
Median : 6.000 Median : 5.000 Median :0.0000 Median : 3.000
Mean : 6.134 Mean : 5.208 Mean :0.9488 Mean : 3.208
3rd Qu.: 7.000 3rd Qu.: 7.000 3rd Qu.:2.0000 3rd Qu.: 4.000
Max. :10.000 Max. :10.000 Max. :8.0000 Max. :18.000
NA's :240 NA's :309 NA's :375 NA's :1173
attr1_s sinc1_s intel1_s fun1_s
Min. : 3.00 Min. : 0.00 Min. : 0.00 Min. : 1.00
1st Qu.:14.81 1st Qu.:10.00 1st Qu.:10.00 1st Qu.:10.00
Median :17.65 Median :15.79 Median :18.42 Median :15.91
Mean :20.79 Mean :15.43 Mean :17.24 Mean :15.26
3rd Qu.:25.00 3rd Qu.:20.00 3rd Qu.:20.00 3rd Qu.:20.00
Max. :95.00 Max. :50.00 Max. :40.00 Max. :40.00
NA's :4282 NA's :4282 NA's :4282 NA's :4282
amb1_s shar1_s attr3_s sinc3_s
Min. : 0.00 Min. : 0.00 Min. : 3.000 Min. : 1.000
1st Qu.: 7.00 1st Qu.: 9.00 1st Qu.: 7.000 1st Qu.: 7.000
Median :10.00 Median :12.50 Median : 7.000 Median : 8.000
Mean :11.14 Mean :12.46 Mean : 7.211 Mean : 8.082
3rd Qu.:15.00 3rd Qu.:16.28 3rd Qu.: 8.000 3rd Qu.: 9.000
Max. :23.81 Max. :30.00 Max. :10.000 Max. :10.000
NA's :4282 NA's :4282 NA's :4378 NA's :4378
intel3_s fun3_s amb3_s satis_2
Min. : 4.000 Min. : 3.000 Min. : 2.000 Min. : 1.000
1st Qu.: 8.000 1st Qu.: 7.000 1st Qu.: 7.000 1st Qu.: 5.000
Median : 8.000 Median : 8.000 Median : 8.000 Median : 6.000
Mean : 8.258 Mean : 7.692 Mean : 7.589 Mean : 5.712
3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 7.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :4378 NA's :4378 NA's :4378 NA's :915
length numdat_2 attr7_2 sinc7_2
Min. :1.000 Min. :1.000 Min. :10.00 Min. : 0.00
1st Qu.:1.000 1st Qu.:2.000 1st Qu.:20.00 1st Qu.:10.00
Median :1.000 Median :2.000 Median :30.00 Median :10.00
Mean :1.843 Mean :2.338 Mean :32.82 Mean :13.53
3rd Qu.:3.000 3rd Qu.:3.000 3rd Qu.:40.00 3rd Qu.:20.00
Max. :3.000 Max. :3.000 Max. :80.00 Max. :40.00
NA's :915 NA's :945 NA's :6394 NA's :6423
intel7_2 fun7_2 amb7_2 shar7_2
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00
1st Qu.:10.00 1st Qu.:10.00 1st Qu.: 0.000 1st Qu.: 5.00
Median :15.00 Median :20.00 Median : 5.000 Median :10.00
Mean :15.29 Mean :18.87 Mean : 7.287 Mean :12.16
3rd Qu.:20.00 3rd Qu.:24.00 3rd Qu.:10.000 3rd Qu.:20.00
Max. :50.00 Max. :50.00 Max. :20.000 Max. :40.00
NA's :6394 NA's :6394 NA's :6423 NA's :6404
attr1_2 sinc1_2 intel1_2 fun1_2
Min. : 5.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.:16.67 1st Qu.:10.00 1st Qu.:15.00 1st Qu.:15.00
Median :20.00 Median :16.67 Median :19.05 Median :18.37
Mean :26.22 Mean :15.87 Mean :17.81 Mean :17.65
3rd Qu.:30.00 3rd Qu.:20.00 3rd Qu.:20.00 3rd Qu.:20.00
Max. :85.00 Max. :50.00 Max. :40.00 Max. :50.00
NA's :933 NA's :915 NA's :915 NA's :915
amb1_2 shar1_2 attr4_2 sinc4_2
Min. : 0.000 Min. : 0.00 Min. : 6.00 Min. : 0.00
1st Qu.: 5.000 1st Qu.:10.00 1st Qu.: 10.00 1st Qu.: 8.00
Median :10.000 Median :13.00 Median : 25.00 Median :10.00
Mean : 9.913 Mean :12.76 Mean : 26.81 Mean :11.93
3rd Qu.:15.000 3rd Qu.:16.67 3rd Qu.: 40.00 3rd Qu.:15.00
Max. :22.220 Max. :35.00 Max. :100.00 Max. :35.00
NA's :915 NA's :915 NA's :2603 NA's :2603
intel4_2 fun4_2 amb4_2 shar4_2
Min. : 0.0 Min. : 0.00 Min. : 0.000 Min. : 0.00
1st Qu.: 8.0 1st Qu.: 9.00 1st Qu.: 5.000 1st Qu.: 7.00
Median :10.0 Median :15.00 Median :10.000 Median :10.00
Mean :12.1 Mean :15.16 Mean : 9.342 Mean :11.32
3rd Qu.:15.0 3rd Qu.:20.00 3rd Qu.:10.000 3rd Qu.:15.00
Max. :40.0 Max. :50.00 Max. :35.000 Max. :40.00
NA's :2603 NA's :2603 NA's :2603 NA's :2603
attr2_2 sinc2_2 intel2_2 fun2_2
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.:19.15 1st Qu.:10.00 1st Qu.:10.00 1st Qu.:15.00
Median :25.00 Median :15.00 Median :15.00 Median :18.52
Mean :29.34 Mean :13.90 Mean :13.96 Mean :17.97
3rd Qu.:38.46 3rd Qu.:19.23 3rd Qu.:17.39 3rd Qu.:20.00
Max. :85.00 Max. :40.00 Max. :30.77 Max. :40.00
NA's :2603 NA's :2603 NA's :2603 NA's :2603
amb2_2 shar2_2 attr3_2 sinc3_2
Min. : 0.00 Min. : 0.00 Min. : 2.000 Min. : 2.000
1st Qu.:10.00 1st Qu.:10.00 1st Qu.: 7.000 1st Qu.: 7.000
Median :10.00 Median :13.95 Median : 7.000 Median : 8.000
Mean :11.91 Mean :12.89 Mean : 7.125 Mean : 7.931
3rd Qu.:15.09 3rd Qu.:16.52 3rd Qu.: 8.000 3rd Qu.: 9.000
Max. :50.00 Max. :30.00 Max. :10.000 Max. :10.000
NA's :2603 NA's :2603 NA's :915 NA's :915
intel3_2 fun3_2 amb3_2 attr5_2
Min. : 4.000 Min. : 1.000 Min. : 2.000 Min. : 2.000
1st Qu.: 8.000 1st Qu.: 7.000 1st Qu.: 7.000 1st Qu.: 6.000
Median : 8.000 Median : 8.000 Median : 8.000 Median : 7.000
Mean : 8.239 Mean : 7.602 Mean : 7.487 Mean : 6.828
3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 8.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :915 NA's :915 NA's :915 NA's :4001
sinc5_2 intel5_2 fun5_2 amb5_2
Min. : 2.000 Min. : 2.000 Min. : 2.000 Min. : 2.000
1st Qu.: 6.000 1st Qu.: 7.000 1st Qu.: 6.000 1st Qu.: 6.000
Median : 8.000 Median : 8.000 Median : 7.000 Median : 7.000
Mean : 7.394 Mean : 7.839 Mean : 7.279 Mean : 7.332
3rd Qu.: 8.000 3rd Qu.: 9.000 3rd Qu.: 8.000 3rd Qu.: 8.000
Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
NA's :4001 NA's :4001 NA's :4001 NA's :4001
you_call them_cal date_3 numdat_3
Min. : 0.000 Min. :0.000 Min. :0.000 Min. :0.000
1st Qu.: 0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:1.000
Median : 0.000 Median :1.000 Median :0.000 Median :1.000
Mean : 0.781 Mean :0.982 Mean :0.377 Mean :1.231
3rd Qu.: 1.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000
Max. :21.000 Max. :9.000 Max. :1.000 Max. :9.000
NA's :4404 NA's :4404 NA's :4404 NA's :6882
num_in_3 attr1_3 sinc1_3 intel1_3
Min. :0.000 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.:1.000 1st Qu.:15.22 1st Qu.:10.00 1st Qu.:16.67
Median :1.000 Median :20.00 Median :16.67 Median :20.00
Mean :0.934 Mean :24.39 Mean :16.59 Mean :19.41
3rd Qu.:1.000 3rd Qu.:30.00 3rd Qu.:20.00 3rd Qu.:20.00
Max. :4.000 Max. :80.00 Max. :65.00 Max. :45.00
NA's :7710 NA's :4404 NA's :4404 NA's :4404
fun1_3 amb1_3 shar1_3 attr7_3 sinc7_3
Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.:14.81 1st Qu.: 5.0 1st Qu.:10.00 1st Qu.:20.00 1st Qu.:10.00
Median :16.33 Median :10.0 Median :14.29 Median :25.00 Median :15.00
Mean :16.23 Mean :10.9 Mean :12.70 Mean :31.33 Mean :15.65
3rd Qu.:20.00 3rd Qu.:15.0 3rd Qu.:16.67 3rd Qu.:40.00 3rd Qu.:20.00
Max. :30.00 Max. :30.0 Max. :55.00 Max. :80.00 Max. :60.00
NA's :4404 NA's :4404 NA's :4404 NA's :6362 NA's :6362
intel7_3 fun7_3 amb7_3 shar7_3
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00
1st Qu.:10.00 1st Qu.:10.00 1st Qu.: 0.000 1st Qu.: 5.00
Median :18.00 Median :17.00 Median :10.000 Median :10.00
Mean :16.68 Mean :16.42 Mean : 7.824 Mean :12.21
3rd Qu.:20.00 3rd Qu.:20.00 3rd Qu.:10.000 3rd Qu.:20.00
Max. :45.00 Max. :40.00 Max. :30.000 Max. :55.00
NA's :6362 NA's :6362 NA's :6362 NA's :6362
attr4_3 sinc4_3 intel4_3 fun4_3
Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
1st Qu.:10.00 1st Qu.: 7.00 1st Qu.: 7.00 1st Qu.: 9.00
Median :20.00 Median :10.00 Median :10.00 Median :12.00
Mean :25.61 Mean :10.75 Mean :11.53 Mean :14.28
3rd Qu.:37.00 3rd Qu.:15.00 3rd Qu.:15.00 3rd Qu.:20.00
Max. :80.00 Max. :40.00 Max. :30.00 Max. :30.00
NA's :5419 NA's :5419 NA's :5419 NA's :5419
amb4_3 shar4_3 attr2_3 sinc2_3
Min. : 0.000 Min. : 0.00 Min. : 5.00 Min. : 0.00
1st Qu.: 5.000 1st Qu.: 7.00 1st Qu.:10.00 1st Qu.: 7.00
Median : 9.000 Median :10.00 Median :20.00 Median :10.00
Mean : 9.208 Mean :11.25 Mean :24.97 Mean :10.92
3rd Qu.:10.000 3rd Qu.:15.00 3rd Qu.:35.00 3rd Qu.:15.00
Max. :40.000 Max. :45.00 Max. :80.00 Max. :50.00
NA's :5419 NA's :5419 NA's :5419 NA's :5419
intel2_3 fun2_3 amb2_3 shar2_3
Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00
1st Qu.: 7.00 1st Qu.: 9.00 1st Qu.: 6.000 1st Qu.: 5.00
Median :10.00 Median :15.00 Median :10.000 Median :10.00
Mean :11.95 Mean :14.96 Mean : 9.526 Mean :11.97
3rd Qu.:15.00 3rd Qu.:20.00 3rd Qu.:10.000 3rd Qu.:15.00
Max. :60.00 Max. :40.00 Max. :50.000 Max. :45.00
NA's :5419 NA's :5419 NA's :5419 NA's :6362
attr3_3 sinc3_3 intel3_3 fun3_3
Min. : 2.00 Min. : 2.000 Min. : 3.000 Min. : 2.000
1st Qu.: 7.00 1st Qu.: 7.000 1st Qu.: 8.000 1st Qu.: 7.000
Median : 7.00 Median : 8.000 Median : 8.000 Median : 8.000
Mean : 7.24 Mean : 8.093 Mean : 8.389 Mean : 7.659
3rd Qu.: 8.00 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.: 9.000
Max. :12.00 Max. :12.000 Max. :12.000 Max. :12.000
NA's :4404 NA's :4404 NA's :4404 NA's :4404
amb3_3 attr5_3 sinc5_3 intel5_3
Min. : 1.000 Min. : 2.00 Min. : 2.000 Min. : 4.000
1st Qu.: 6.000 1st Qu.: 6.00 1st Qu.: 7.000 1st Qu.: 7.000
Median : 8.000 Median : 7.00 Median : 8.000 Median : 8.000
Mean : 7.392 Mean : 6.81 Mean : 7.615 Mean : 7.933
3rd Qu.: 9.000 3rd Qu.: 8.00 3rd Qu.: 9.000 3rd Qu.: 9.000
Max. :12.000 Max. :10.00 Max. :10.000 Max. :10.000
NA's :4404 NA's :6362 NA's :6362 NA's :6362
fun5_3 amb5_3
Min. : 1.000 Min. : 1.000
1st Qu.: 6.000 1st Qu.: 6.000
Median : 7.000 Median : 7.000
Mean : 7.155 Mean : 7.049
3rd Qu.: 8.000 3rd Qu.: 8.000
Max. :10.000 Max. :10.000
NA's :6362 NA's :6362
Line Plot
spddat_plot1 = data.frame (time= c ("Before" , "During" , "Days After" , "Weeks After" ,
"Before" , "During" , "Days After" , "Weeks After" )
,match= c (0 , 0 , 0 , 0 ,
1 , 1 , 1 , 1 )
,shar_imp= c (mean (spd_dat[! is.na (spd_dat$ shar1_1) & spd_dat$ match == 0 ,]$ shar1_1),
mean (spd_dat[! is.na (spd_dat$ shar1_s) & spd_dat$ match == 0 ,]$ shar1_s),
mean (spd_dat[! is.na (spd_dat$ shar7_2) & spd_dat$ match == 0 ,]$ shar7_2),
mean (spd_dat[! is.na (spd_dat$ shar1_2) & spd_dat$ match == 0 ,]$ shar1_2),
mean (spd_dat[! is.na (spd_dat$ shar1_1) & spd_dat$ match == 1 ,]$ shar1_1),
mean (spd_dat[! is.na (spd_dat$ shar1_s) & spd_dat$ match == 1 ,]$ shar1_s),
mean (spd_dat[! is.na (spd_dat$ shar7_2) & spd_dat$ match == 1 ,]$ shar7_2),
mean (spd_dat[! is.na (spd_dat$ shar1_2) & spd_dat$ match == 1 ,]$ shar1_2)))
#spd_dat <- spd_dat[!is.na(spd_dat$shar1_1),]
#print(mean(spd_dat$shar1_1))
spddat_plot1
time match shar_imp
1 Before 0 11.98161
2 During 0 12.54435
3 Days After 0 12.37751
4 Weeks After 0 12.82038
5 Before 1 11.15288
6 During 1 12.01364
7 Days After 1 11.04863
8 Weeks After 1 12.46087
Line chart over time with colors for different categories
ggplot (spddat_plot1, aes (x= time, y= shar_imp, group= match, colour= as.character (match))) +
geom_line () +
geom_point () +
scale_colour_discrete (labels= c ("No Match" , "Match" ), type= c ("#7b3494" , "#008837" )) +
labs (title = "Importance of Shared Interests over Time Surveyed" ,
colour = "Match" ) +
xlab ("Time surveyed" ) +
ylab ("Importance of shared interests" ) +
theme_light () +
theme (text = element_text (family= "Avenir" )) +
ylim (c (11 , 13 ))
Smoothed Line Chart
Line chart of order vs. amount liked with match and gender
spddat_plot2 <- subset (spd_dat, select= c (order, like, match, gender))
#summary(spddat_plot2) # reveals that like column has 240 NAs
spddat_plot2 <- na.omit (spddat_plot2)
#spddat_plot2 <- head(spddat_plot2, 50)
summary (spddat_plot2)
order like match gender
Min. : 1.000 Min. : 0.000 Min. :0.0000 Min. :0.0000
1st Qu.: 4.000 1st Qu.: 5.000 1st Qu.:0.0000 1st Qu.:0.0000
Median : 8.000 Median : 6.000 Median :0.0000 Median :1.0000
Mean : 8.843 Mean : 6.134 Mean :0.1691 Mean :0.5025
3rd Qu.:13.000 3rd Qu.: 7.000 3rd Qu.:0.0000 3rd Qu.:1.0000
Max. :22.000 Max. :10.000 Max. :1.0000 Max. :1.0000
ggplot (spddat_plot2, aes (x= order, y= like, group= interaction (match, gender), color= as.character (match))) +
geom_smooth (aes (linetype= as.character (gender))) +
#geom_point(position="jitter") +
scale_colour_discrete (labels= c ("No Match" , "Match" ), type= c ("#7b3494" , "#008837" )) +
scale_linetype (labels= c ("Female" , "Male" )) +
labs (title = "Amount liked match vs. order met" ,
colour = "Match" ,
linetype = "Gender" ) +
xlab ("Order met in night" ) +
ylab ("Amount liked match" ) +
theme_light () +
theme (text = element_text (family= "Avenir" )) +
ylim (c (1 , 10 ))
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 8 rows containing non-finite values (stat_smooth).
Bubble Plot
spddat_plot3 <- subset (spd_dat, select= c (income, gender, mn_sat, age, field_cd, like))
spddat_plot3$ income <- gsub ("," , "" , spddat_plot3$ income)
spddat_plot3$ income <- as.numeric (spddat_plot3$ income)
spddat_plot3$ mn_sat <- gsub ("," , "" , spddat_plot3$ mn_sat)
spddat_plot3$ mn_sat <- as.numeric (spddat_plot3$ mn_sat)
spddat_plot3 <- na.omit (spddat_plot3)
summary (spddat_plot3)
income gender mn_sat age
Min. : 8607 Min. :0.0000 Min. : 914 Min. :21.00
1st Qu.: 35187 1st Qu.:0.0000 1st Qu.:1210 1st Qu.:24.00
Median : 46272 Median :0.0000 Median :1310 Median :26.00
Mean : 46838 Mean :0.3431 Mean :1297 Mean :26.32
3rd Qu.: 55080 3rd Qu.:1.0000 3rd Qu.:1400 3rd Qu.:28.00
Max. :106663 Max. :1.0000 Max. :1470 Max. :55.00
field_cd like
Min. : 1.000 Min. : 0.000
1st Qu.: 5.000 1st Qu.: 5.000
Median : 8.000 Median : 6.000
Mean : 7.879 Mean : 6.006
3rd Qu.:11.000 3rd Qu.: 7.000
Max. :18.000 Max. :10.000
spddat_plot3 %>% distinct () -> spddat_plot3
ggplot (spddat_plot3, aes (x= mn_sat, y= income, group= gender, color= as.character (gender))) +
geom_point (aes (size= age), alpha= 0.7 ) +
scale_y_continuous (trans= "log10" ) +
scale_colour_discrete (labels= c ("Female" , "Male" ), type= c ("#7b3494" , "#008837" )) +
scale_size (range= c (2 , 7 ), breaks= c (22 , 30 , 40 , 55 )) +
labs (title = "Demographics by Home Area" ,
colour = "Gender" ,
size = "Age" ) +
xlab ("Median SAT Score" ) +
ylab ("Income of Home Area" ) +
theme_light () +
theme (text = element_text (family= "Avenir" ))
ggplot (spddat_plot3, aes (x= field_cd, y= income, group= gender, color= as.character (gender))) +
geom_point (aes (size= like), alpha= 0.7 ) +
scale_y_continuous (trans= "log10" ) +
scale_colour_discrete (labels= c ("Female" , "Male" ), type= c ("#7b3494" , "#008837" )) +
scale_size (range= c (0.25 , 6 )) +
labs (title = "" ,
colour = "Gender" ,
size = "Amount Liked" ) +
xlab ("Field of Major" ) +
ylab ("Income of Home Area" ) +
theme_light () +
theme (text = element_text (family= "Avenir" ))
Interest Correlation Plots
sdp4 <- subset (spd_dat, select= c (int_corr, shar1_1, match, wave))
sdp4 <- na.omit (sdp4)
summary (sdp4)
int_corr shar1_1 match wave
Min. :-0.8300 Min. : 0.00 Min. :0.0000 Min. : 1.0
1st Qu.:-0.0100 1st Qu.: 9.52 1st Qu.:0.0000 1st Qu.: 7.0
Median : 0.2100 Median :10.64 Median :0.0000 Median :11.0
Mean : 0.1974 Mean :11.85 Mean :0.1645 Mean :11.4
3rd Qu.: 0.4300 3rd Qu.:16.00 3rd Qu.:0.0000 3rd Qu.:15.0
Max. : 0.9100 Max. :30.00 Max. :1.0000 Max. :21.0
ggplot (sdp4, aes (x= shar1_1, y= int_corr, colour= as.character (match), group= match)) +
geom_point (alpha = 0.6 ) +
geom_smooth () +
scale_colour_discrete (labels= c ("No Match" , "Match" ), type= c ("#7b3494" , "#008837" )) +
labs (title = "Interest Correlation and Importance of Shared Interest" ,
colour = "Match" ) +
xlab ("Importance of Shared Interests" ) +
ylab ("Interest Correlation" ) +
theme_light () +
theme (text = element_text (family= "Avenir" )) +
geom_hline (aes (yintercept= 0 ))
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot (sdp4, aes (x= shar1_1, y= int_corr, colour= int_corr)) +
geom_point () +
scale_colour_distiller (palette = "RdYlGn" ) + #PRGn
labs (title = "Interest Correlation and Importance of Shared Interest" ) +
xlab ("Importance of Shared Interests" ) +
ylab ("Interest Correlation" ) +
theme_grey () +
theme (text = element_text (family= "Avenir" ),
legend.position = "none" ) +
geom_hline (aes (yintercept= 0 ), color= "#ffffbf" )