Skip to content

Commit 5eb0a83

Browse files
author
Daniel Dale
committed
new realized-performance scoring views
1 parent 73bcc16 commit 5eb0a83

File tree

1 file changed

+88
-10
lines changed

1 file changed

+88
-10
lines changed

db_setup/deep_classiflie_db_reporting_objects.sql

Lines changed: 88 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,7 @@ round(avg(correct_incorrect) - avg(confidence),3) acc_conf_delta
735735
from confidence_buckets
736736
group by confidence_bucket
737737
order by confidence_bucket;
738-
create or replace view max_acc_nontweets as
738+
create or replace view max_ppv_nontweets as
739739
with test_data_filter as
740740
(select * from model_analysis_rpts where
741741
model_version in (select max(model_version) from model_analysis_rpts where report_type='model_rpt_gt')
@@ -783,7 +783,7 @@ target_bucket as
783783
(select confidence_bucket, acc, ppv, npv, ppr, npr from bucket_stats where ppv=(select max(ppv) from bucket_stats) order by confidence_bucket limit 1)
784784
select tb.acc as bucket_acc, concat(round((cb.confidence_bucket-1)*0.04*100,0), '-', round(cb.confidence_bucket*0.04*100,0),'%') as conf_percentile, tb.ppv as pos_pred_acc, tb.npv as neg_pred_acc, tb.ppr as pos_pred_ratio, tb.npr as neg_pred_ratio, cb.statement_id, cb.statement_text, tp, tn, fp, fn
785785
from confidence_buckets cb, target_bucket tb where cb.confidence_bucket=tb.confidence_bucket;
786-
create or replace view max_acc_tweets as
786+
create or replace view max_ppv_tweets as
787787
with test_data_filter as
788788
(select * from model_analysis_rpts where
789789
model_version in (select max(model_version) from model_analysis_rpts where report_type='model_rpt_gt')
@@ -831,8 +831,7 @@ target_bucket as
831831
(select confidence_bucket, acc, ppv, npv, ppr, npr from bucket_stats where ppv=(select max(ppv) from bucket_stats) order by confidence_bucket limit 1)
832832
select tb.acc as bucket_acc, concat(round((cb.confidence_bucket-1)*0.1*100,0), '-', round(cb.confidence_bucket*0.1*100,0),'%') as conf_percentile, tb.ppv as pos_pred_acc, tb.npv as neg_pred_acc, tb.ppr as pos_pred_ratio, tb.npr as neg_pred_ratio, cb.statement_id, cb.statement_text, tp, tn, fp, fn
833833
from confidence_buckets cb, target_bucket tb where cb.confidence_bucket=tb.confidence_bucket;
834-
835-
create or replace view min_acc_nontweets as
834+
create or replace view min_ppv_nontweets as
836835
with test_data_filter as
837836
(select * from model_analysis_rpts where
838837
model_version in (select max(model_version) from model_analysis_rpts where report_type='model_rpt_gt')
@@ -880,7 +879,7 @@ target_bucket as
880879
(select confidence_bucket, acc, ppv, npv, ppr, npr from bucket_stats where ppv=(select min(ppv) from bucket_stats) order by confidence_bucket limit 1)
881880
select tb.acc as bucket_acc, concat(round((cb.confidence_bucket-1)*0.04*100,0), '-', round(cb.confidence_bucket*0.04*100,0),'%') as conf_percentile, tb.ppv as pos_pred_acc, tb.npv as neg_pred_acc, tb.ppr as pos_pred_ratio, tb.npr as neg_pred_ratio, cb.statement_id, cb.statement_text, tp, tn, fp, fn
882881
from confidence_buckets cb, target_bucket tb where cb.confidence_bucket=tb.confidence_bucket;
883-
create or replace view min_acc_tweets as
882+
create or replace view min_ppv_tweets as
884883
with test_data_filter as
885884
(select * from model_analysis_rpts where
886885
model_version in (select max(model_version) from model_analysis_rpts where report_type='model_rpt_gt')
@@ -930,13 +929,13 @@ select tb.acc as bucket_acc, concat(round((cb.confidence_bucket-1)*0.1*100,0), '
930929
from confidence_buckets cb, target_bucket tb where cb.confidence_bucket=tb.confidence_bucket;
931930
create or replace view pred_explr_stmts as
932931
with man as
933-
(select 'max_acc_nontweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 0 as stype, tp, tn, fp, fn from max_acc_nontweets order by RAND(2718) limit 100),
932+
(select 'max_ppv_nontweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 0 as stype, tp, tn, fp, fn from max_ppv_nontweets order by RAND(2718) limit 100),
934933
mat as
935-
(select 'max_acc_tweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 1 as stype, tp, tn, fp, fn from max_acc_tweets order by RAND(2718) limit 100),
934+
(select 'max_ppv_tweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 1 as stype, tp, tn, fp, fn from max_ppv_tweets order by RAND(2718) limit 100),
936935
mian as
937-
(select 'min_acc_nontweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 0 as stype, tp, tn, fp, fn from min_acc_nontweets order by RAND(2718) limit 100),
936+
(select 'min_ppv_nontweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 0 as stype, tp, tn, fp, fn from min_ppv_nontweets order by RAND(2718) limit 100),
938937
miat as
939-
(select 'min_acc_tweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 1 as stype, tp, tn, fp, fn from min_acc_tweets order by RAND(2718) limit 100),
938+
(select 'min_ppv_tweets' as bucket_type, bucket_acc, conf_percentile, pos_pred_acc, neg_pred_acc, pos_pred_ratio, neg_pred_ratio, statement_id, statement_text, 1 as stype, tp, tn, fp, fn from min_ppv_tweets order by RAND(2718) limit 100),
940939
distinct_stmts as
941940
(
942941
select * from man
@@ -1115,4 +1114,83 @@ analyze_ids as
11151114
(select ut.thread_id from unlabeled_tweets ut left join published_tweets pt
11161115
on ut.thread_id=pt.thread_id where pt.thread_id is NULL)
11171116
select dt.thread_id, dt.end_thread_tweet_id, statement_text, 1 as ctxt_type, dt.t_end_date, CONCAT('https://twitter.com/a/status/',dt.end_thread_tweet_id) as url from dcbot_tweets dt, analyze_ids ai
1118-
where dt.thread_id=ai.thread_id and dt.wc between 7 and 107 and dt.retweet=0;
1117+
where dt.thread_id=ai.thread_id and dt.wc between 7 and 107 and dt.retweet=0;
1118+
create or replace view latest_scored_false_statements as
1119+
with last_dsid as
1120+
(select dsid from model_metadata where model_version=(select max(model_version) from model_metadata)),
1121+
new_falsehoods as
1122+
(select sid, s_date, statement_text from wp_statements
1123+
where s_date > (select test_end_date from ds_metadata dsm, last_dsid ld where ds_type='converged_filtered' and dsm.dsid = ld.dsid)),
1124+
-- test version: where s_date > DATE_SUB((select test_end_date from ds_metadata dsm, last_dsid ld where ds_type='converged_filtered' and dsm.dsid = ld.dsid), INTERVAL 30 DAY)),
1125+
matched_falsehood_truths as (
1126+
select nf.sid as thread_id,
1127+
nf.s_date as s_date,
1128+
SUBSTRING_INDEX(bftdc.truth_id ,'***',1) AS tid,
1129+
CAST(SUBSTRING_INDEX(bftdc.truth_id ,'***',-1) as INT) AS sid,
1130+
bftdc.l2dist as l2dist,
1131+
nf.statement_text as falsehood_text,
1132+
fs.statement_text as truth_text
1133+
from new_falsehoods nf, base_false_truth_del_cands bftdc, fbase_statements fs
1134+
where nf.sid = bftdc.falsehood_id
1135+
and fs.tid = SUBSTRING_INDEX(bftdc.truth_id ,'***',1)
1136+
and fs.sid = CAST(SUBSTRING_INDEX(bftdc.truth_id ,'***',-1) as INT)
1137+
and bftdc.l2dist < 0.04
1138+
)
1139+
select min(l2dist), thread_id, s_date, tid, sid, falsehood_text, truth_text from matched_falsehood_truths group by thread_id;
1140+
create or replace view latest_pub_stmt_updates as
1141+
select isp.tid, isp.sid, lsfs.s_date, isp.model_version from latest_scored_false_statements lsfs, infsvc_stmts_published isp, model_metadata mm
1142+
where lsfs.tid=isp.tid
1143+
and lsfs.sid=isp.sid
1144+
and mm.model_version=isp.model_version
1145+
and mm.model_version=(select max(model_version) from model_metadata)
1146+
and lsfs.s_date <= (select max(s_date) from wp_statements);
1147+
create or replace view latest_scored_false_tweets as
1148+
with last_dsid as
1149+
(select dsid from model_metadata where model_version=(select max(model_version) from model_metadata)),
1150+
new_falsehoods as
1151+
(select sid, s_date, statement_text from wp_statements
1152+
where s_date > (select test_end_date from ds_metadata dsm, last_dsid ld where ds_type='converged_filtered' and dsm.dsid = ld.dsid)),
1153+
-- test version: where s_date > DATE_SUB((select test_end_date from ds_metadata dsm, last_dsid ld where ds_type='converged_filtered' and dsm.dsid = ld.dsid), INTERVAL 30 DAY)),
1154+
matched_falsehood_truths as (
1155+
select nf.sid as f_thread_id,
1156+
nf.s_date as s_date,
1157+
CAST(SUBSTRING_INDEX(bftdc.truth_id ,'***',1) as INT) AS thread_id,
1158+
CAST(SUBSTRING_INDEX(bftdc.truth_id ,'***',-1) as INT) AS end_thread_tweet_id,
1159+
bftdc.l2dist as l2dist,
1160+
nf.statement_text as falsehood_text,
1161+
dt.statement_text as truth_text
1162+
from new_falsehoods nf, base_false_truth_del_cands bftdc, dcbot_tweets dt
1163+
where nf.sid = bftdc.falsehood_id
1164+
and dt.thread_id = CAST(SUBSTRING_INDEX(bftdc.truth_id ,'***',1) as INT)
1165+
and dt.end_thread_tweet_id = CAST(SUBSTRING_INDEX(bftdc.truth_id ,'***', -1) as INT)
1166+
and bftdc.l2dist < 0.04
1167+
)
1168+
select min(l2dist), f_thread_id, s_date, thread_id, falsehood_text, truth_text from matched_falsehood_truths group by f_thread_id;
1169+
create or replace view latest_pub_tweet_updates as
1170+
select itp.thread_id, lsft.s_date, itp.model_version from latest_scored_false_tweets lsft, infsvc_tweets_published itp, model_metadata mm
1171+
where lsft.thread_id=itp.thread_id
1172+
and mm.model_version=itp.model_version
1173+
and mm.model_version=(select max(model_version) from model_metadata)
1174+
and lsft.s_date <= (select max(s_date) from wp_statements);
1175+
create or replace view latest_pub_stmt_nonlabel_updates as
1176+
with last_dsid as
1177+
(select dsid from model_metadata where model_version=(select max(model_version) from model_metadata)),
1178+
all_cand_ids as
1179+
(select fs.tid, fs.sid, ft.t_date
1180+
from infsvc_stmts_published isp, fbase_statements fs, fbase_transcripts ft where
1181+
fs.tid=ft.tid
1182+
and fs.tid=isp.tid
1183+
and fs.sid=isp.sid
1184+
and ft.t_date > (select test_end_date from ds_metadata dsm, last_dsid ld where ds_type='converged_filtered' and dsm.dsid = ld.dsid)
1185+
and ft.t_date <= (select max(s_date) from wp_statements))
1186+
select * from all_cand_ids;
1187+
create or replace view latest_pub_tweet_nonlabel_updates as
1188+
with last_dsid as
1189+
(select dsid from model_metadata where model_version=(select max(model_version) from model_metadata)),
1190+
all_cand_ids as
1191+
(select dt.thread_id, dt.t_end_date
1192+
from infsvc_tweets_published itp, dcbot_tweets dt where dt.thread_id=itp.thread_id
1193+
and dt.t_end_date > (select test_end_date from ds_metadata dsm, last_dsid ld where ds_type='converged_filtered' and dsm.dsid = ld.dsid)
1194+
and dt.t_end_date <= (select max(s_date) from wp_statements))
1195+
select * from all_cand_ids;
1196+

0 commit comments

Comments
 (0)