SQL script to perform linear regression analysis on the COVIDcast data

This page documents the preview version (v2.21). Preview includes features under active development and is for development and testing only. For production, use the stable version (v2024.1). To learn more, see Versioning.

Save this script as "analysis-queries.sql"

create or replace view covidcast_fb_survey_results_v as
select
  survey_date,
  state,
  mask_wearing_pct,
  cmnty_symptoms_pct as symptoms_pct
from covidcast_fb_survey_results;

\o analysis-results/analysis-queries.txt
\t on
select 'Symptoms by state for survey date = 2020-10-21.';
\t off
select
  round(mask_wearing_pct)  as "% wearing mask",
  round(symptoms_pct)      as "% with symptoms",
  state
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd')
order by 1;

\t on
select 'Symptoms by state, overall average.';
\t off
select
  round(avg(mask_wearing_pct))  as "% wearing mask",
  round(avg(symptoms_pct))      as "% with symptoms",
  state
from covidcast_fb_survey_results_v
group by state
order by 1;

\t on
select 'Daily regression analysis report.';
\t off
with a as (
  select
                                                      survey_date,
    avg           (mask_wearing_pct)               as mask_wearing_pct,
    avg           (symptoms_pct)                   as symptoms_pct,
    regr_r2       (symptoms_pct, mask_wearing_pct) as r2,
    regr_slope    (symptoms_pct, mask_wearing_pct) as s,
    regr_intercept(symptoms_pct, mask_wearing_pct) as i
  from covidcast_fb_survey_results_v
  group by survey_date)
select
  to_char(survey_date,      'mm/dd')  as survey_date,
  to_char(mask_wearing_pct,    '90')  as mask_wearing_pct,
  to_char(symptoms_pct,  '90')        as symptoms_pct,
  to_char(r2,  '0.99')                as r2,
  to_char(s,  '90.9')                 as s,
  to_char(i,  '990.9')                as i
from a
order by survey_date;

\t on
select 'Regression analysis report for survey date = 2020-10-21.';
\t off
with a as (
  select
    max(survey_date)                               as survey_date,
    regr_slope    (symptoms_pct, mask_wearing_pct) as s,
    regr_intercept(symptoms_pct, mask_wearing_pct) as i
  from covidcast_fb_survey_results_v
  where survey_date = to_date('2020-10-21', 'yyyy-mm-dd'))
select
  to_char(survey_date,      'mm/dd')  as survey_date,
  to_char(s,  '90.9')                 as s,
  to_char(i,  '990.9')                as i
from a;

with a as (
  select regr_r2 (symptoms_pct, mask_wearing_pct) as r2
  from covidcast_fb_survey_results_v
  group by survey_date)
select
  to_char(avg(r2), '0.99') as "avg(R-squared)"
from a;

\o

\o analysis-results/2020-10-21-mask-symptoms.csv
\t on
select
  round(mask_wearing_pct)::text||','||round(symptoms_pct)::text
from covidcast_fb_survey_results_v
where survey_date = to_date('2020-10-21', 'yyyy-mm-dd')
order by 1;
\t off
\o