Skip to content

Sixway Fact Labels #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions app/classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
Y_COLS_MULTICLASS = [
# multiclass classification with categorical classes:
"fourway_label", #"bom_overall_fourway_label", "bom_astroturf_fourway_label"
"sixway_fact_label" #, sixway_q_label
]
Y_COLS = Y_COLS_BINARY + Y_COLS_MULTICLASS
Y_COLS = ["sixway_fact_label"] #= Y_COLS_BINARY + Y_COLS_MULTICLASS

BOT_CLASSES_MAP = {True:"Bot", False:"Human"}
CLASSES_MAP = {
Expand All @@ -28,7 +29,7 @@
"is_bom_astroturf": BOT_CLASSES_MAP,
"opinion_community": {0:"Anti-Trump", 1:"Pro-Trump"},
"is_toxic": {0: "Normal", 1: "Toxic"},
"is_factual": {0: "Low Quality", 1: "High Quality"},
"is_factual": {0: "Low-Quality", 1: "High-Quality"},
}


Expand Down
9 changes: 8 additions & 1 deletion app/classification/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,11 +266,18 @@ def plot_roc_curve_multiclass(self, fig_show=FIG_SHOW, fig_save=FIG_SAVE, height

chart_data = []
for i, class_name in enumerate(class_names):

fpr, tpr, _ = roc_curve(y_test_encoded[:,i], self.y_pred_proba[:,i])
score = auc(fpr, tpr)

try:
color = ORANGES[i+2]
except IndexError:
color = ORANGES[-1] # just use the same color once we run out of oranges

trace = go.Scatter(x=fpr, y=tpr,
mode='lines',
line=dict(color=ORANGES[i+2], width=2),
line=dict(color=color, width=2),
name=f"'{str(class_name).title()}' vs Rest (AUC = {score.round(3)})"
)
chart_data.append(trace)
Expand Down
18 changes: 3 additions & 15 deletions app/colors.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
BOT_COLORS_MAP = {"Human": GREYS[3], "Bot": PURPLES[6]}
Q_COLORS_MAP = {"Normal": GREYS[3], "Q-anon": REDS[6]}
TOXIC_COLORS_MAP = {"Toxic": BROWNS[1], "Normal": GREYS[3]}
FACT_COLORS_MAP = {"High Quality": GREYS[3], "Low Quality": RD_PU[4]}
FACT_COLORS_MAP = {"High-Quality": GREYS[3], "Low-Quality": RD_PU[4]}

FOURWAY_COLORS_MAP = {
"Anti-Trump Human": BLUES[3],
Expand All @@ -29,17 +29,6 @@
"Pro-Trump Human": REDS[3],
"Pro-Trump Bot": REDS[6],
}
SIXWAY_COLORS_MAP = {
"Anti-Trump Human": BLUES[3],
"Anti-Trump Bot": BLUES[6],

"Pro-Trump Human": REDS[3],
"Pro-Trump Bot": REDS[6],

"Q-anon Human": REDS[4], # "Pro-Trump Q-anon Human"
"Q-anon Bot": REDS[7], # "Pro-Trump Q-anon Bot"
}


COLORS_MAP = {
"bot_label": BOT_COLORS_MAP,
Expand All @@ -49,7 +38,7 @@
"factual_label": FACT_COLORS_MAP,

"fourway_label": FOURWAY_COLORS_MAP,
"sixway_label": SIXWAY_COLORS_MAP,

"bom_overall_label": BOT_COLORS_MAP,
"bom_astroturf_label": BOT_COLORS_MAP,
}
Expand All @@ -64,8 +53,7 @@
"q_label": ["Normal", "Q-anon"],

"toxic_label": ["Normal", "Toxic"],
"factual_label": ["High Quality", "Low Quality"],
"factual_label": ["High-quality", "Low-Quality"],

"fourway_label": list(FOURWAY_COLORS_MAP.keys()),
"sixway_label": list(SIXWAY_COLORS_MAP.keys()),
}
22 changes: 13 additions & 9 deletions app/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@

'opinion_label', 'bot_label', 'q_label',
"toxic_label", "factual_label",
'bom_overall_label', 'bom_astroturf_label', #'group_label'
'fourway_label', 'sixway_label', "bom_overall_fourway_label", "bom_astroturf_fourway_label"
'bom_overall_label', 'bom_astroturf_label',
'fourway_label', 'sixway_q_label', "sixway_fact_label",
"bom_overall_fourway_label", "bom_astroturf_fourway_label"
]


Expand All @@ -37,18 +38,13 @@ def __init__(self, csv_filepath=CSV_FILEPATH, label_cols=LABEL_COLS):
def df(self):
df = read_csv(self.csv_filepath)

df.rename(columns={"group_label": "sixway_label"}, inplace=True)
#print(df["sixway_label"].value_counts())
df.rename(columns={"group_label": "sixway_q_label"}, inplace=True)

df["fourway_label"] = df["opinion_label"] + " " + df["bot_label"]
#print(df["fourway_label"].value_counts())

df["is_bom_overall"] = df["bom_overall"].round()
df["is_bom_astroturf"] = df["bom_astroturf"].round()
df["bom_overall_label"] = df["is_bom_overall"].map({1:"Bot", 0:"Human"})
df["bom_astroturf_label"] = df["is_bom_astroturf"].map({1:"Bot", 0:"Human"})
df["bom_overall_fourway_label"] = df["opinion_label"] + " " + df["bom_overall_label"]
df["bom_astroturf_fourway_label"] = df["opinion_label"] + " " + df["bom_astroturf_label"]

toxic_threshold = 0.1 # set threshold and check robustness
df["is_toxic"] = df["avg_toxicity"] >= toxic_threshold
Expand All @@ -59,7 +55,15 @@ def df(self):
fact_threshold = 3.0 # set threshold and check robustness
df["is_factual"] = df["avg_fact_score"].apply(lambda score: score if isnull(score) else score >= fact_threshold)
df["is_factual"] = df["is_factual"].map({True: 1, False :0 })
df["factual_label"] = df["is_factual"].map({1: "High Quality", 0 :"Low Quality" })
df["factual_label"] = df["is_factual"].map({1: "High-Quality", 0 :"Low-Quality" })

# COMBINATIONS

df["fourway_label"] = df["opinion_label"] + " " + df["bot_label"]
df["bom_overall_fourway_label"] = df["opinion_label"] + " " + df["bom_overall_label"]
df["bom_astroturf_fourway_label"] = df["opinion_label"] + " " + df["bom_astroturf_label"]

df["sixway_fact_label"] = df["opinion_label"] + " " + df["bot_label"] + " " + df["factual_label"]

return df

Expand Down
2 changes: 1 addition & 1 deletion app/reduction/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def plot_centroids(self, groupby_col, height=500, fig_show=FIG_SHOW, fig_save=FI
"bot_label", "opinion_label", "bom_overall_label", "bom_astroturf_label",
"toxic_label", "factual_label",

"fourway_label", #"sixway_label",
"fourway_label", #"sixway_fact_label",
]:
color_map = COLORS_MAP[groupby_col]
category_orders = {groupby_col: CATEGORY_ORDERS[groupby_col]}
Expand Down
2 changes: 1 addition & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

N_USERS = 7566
N_FEATURES = 1536 # number of embeddings returned by openai
N_LABELS = 36 # number of label columns
N_LABELS = 37 # number of label columns

@fixture(scope="module")
def ds():
Expand Down
3 changes: 1 addition & 2 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ <h3><a href="results/reduced_classification/index.html">Classification Results (
"opinion_label": "Opinion Community",
"q_label": "Qanon Status",
"fourway_label": "Four Group Label",
"sixway_label": "Six Group Label"
}
//var table = document.getElementById("results-table")
//var tableBody = table.tBodies[0]
Expand Down Expand Up @@ -102,7 +101,7 @@ <h3><a href="results/reduced_classification/index.html">Classification Results (
var GROUPS = ["bot_label",
"bom_overall_label", "bom_astroturf_label",
"opinion_label",
"fourway_label", //"sixway_label"
"fourway_label",
]

REDUCTION_METHODS.forEach(function(reduction_method){
Expand Down

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
{
"class_names": [
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7"
],
"class_labels": [
"Anti-Trump Bot High-Quality",
"Anti-Trump Bot Low-Quality",
"Anti-Trump Human High-Quality",
"Anti-Trump Human Low-Quality",
"Pro-Trump Bot High-Quality",
"Pro-Trump Bot Low-Quality",
"Pro-Trump Human High-Quality",
"Pro-Trump Human Low-Quality"
],
"classification_report": {
"Anti-Trump Bot High-Quality": {
"precision": 0.6683804627249358,
"recall": 0.9961685823754789,
"f1-score": 0.8,
"support": 261.0
},
"Anti-Trump Bot Low-Quality": {
"precision": 0.0,
"recall": 0.0,
"f1-score": 0.0,
"support": 96.0
},
"Anti-Trump Human High-Quality": {
"precision": 0.8333333333333334,
"recall": 0.1724137931034483,
"f1-score": 0.28571428571428575,
"support": 29.0
},
"Anti-Trump Human Low-Quality": {
"precision": 0.0,
"recall": 0.0,
"f1-score": 0.0,
"support": 9.0
},
"Pro-Trump Bot High-Quality": {
"precision": 0.0,
"recall": 0.0,
"f1-score": 0.0,
"support": 10.0
},
"Pro-Trump Bot Low-Quality": {
"precision": 0.8939393939393939,
"recall": 0.9915966386554622,
"f1-score": 0.9402390438247011,
"support": 238.0
},
"Pro-Trump Human High-Quality": {
"precision": 0.0,
"recall": 0.0,
"f1-score": 0.0,
"support": 2.0
},
"Pro-Trump Human Low-Quality": {
"precision": 0.0,
"recall": 0.0,
"f1-score": 0.0,
"support": 14.0
},
"accuracy": 0.7602427921092565,
"macro avg": {
"precision": 0.2994566487497079,
"recall": 0.2700223767667987,
"f1-score": 0.2532441661923734,
"support": 659.0
},
"weighted avg": {
"precision": 0.6242360291281497,
"recall": 0.7602427921092565,
"f1-score": 0.6689872636054525,
"support": 659.0
}
},
"confusion_matrix": [
[
260,
0,
0,
0,
0,
1,
0,
0
],
[
94,
0,
0,
0,
0,
2,
0,
0
],
[
24,
0,
5,
0,
0,
0,
0,
0
],
[
8,
0,
1,
0,
0,
0,
0,
0
],
[
1,
0,
0,
0,
0,
9,
0,
0
],
[
2,
0,
0,
0,
0,
236,
0,
0
],
[
0,
0,
0,
0,
0,
2,
0,
0
],
[
0,
0,
0,
0,
0,
14,
0,
0
]
],
"roc_auc_score": 0.8332511940989005,
"y_col": "sixway_fact_label",
"x_scaled": false,
"grid_search": {
"model_type": "LogisticRegression",
"k_folds": 5,
"param_grid": {
"classifier__max_iter": [
25,
1000,
10000
]
},
"best_params": {
"classifier__max_iter": 1000
},
"best_score": 0.8309722151668659
}
}

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 0 additions & 14 deletions results/reduction/sixway_label/pca_2.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/pca_2_centroids.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/pca_3.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/pca_3_centroids.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/tsne_2.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/tsne_3.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/umap_2.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/umap_2_centroids.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/umap_3.html

This file was deleted.

14 changes: 0 additions & 14 deletions results/reduction/sixway_label/umap_3_centroids.html

This file was deleted.