altilunium · October 14, 2025 22:32
diff --git a/indonesiaDCT_analyze.py b/indonesiaDCT_analyze.py
 import os
 import json
 import praw
 import networkx as nx
 import pandas as pd
 from urllib.parse import urlparse
 from dash import Dash, html, dcc, dash_table
 from dash.dependencies import Input, Output, State

 import plotly.graph_objects as go
 from collections import Counter
 import nltk
 nltk.download('stopwords') 

 global pair_scores
 pair_scores = {}


 # ================= CONFIGURATION ==================
 # https://www.reddit.com/prefs/apps/
 CLIENT_ID = "ENTER HERE"
 CLIENT_SECRET = "ENTER HERE"
 USER_AGENT = "ENTER HERE"

 CACHE_DIR = "reddit_cache"
 os.makedirs(CACHE_DIR, exist_ok=True)

 reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    user_agent=USER_AGENT
 )


 # ================= UTILITIES ==================
 def extract_submission_id(url):
    path_parts = urlparse(url).path.strip("/").split("/")
    try:
        idx = path_parts.index("comments") + 1
        return path_parts[idx]
    except (ValueError, IndexError):
        raise ValueError(f"Invalid Reddit URL: {url}")


 def fetch_or_load_submission(url):
    """Fetch thread comments from Reddit or load from cache"""
    submission_id = extract_submission_id(url)
    cache_path = os.path.join(CACHE_DIR, f"{submission_id}.json")

    if os.path.exists(cache_path):
        print(f"Loading cached thread: {submission_id}")
        print(cache_path)
        with open(cache_path, "r", encoding="utf-8") as f:
            return json.load(f)

    submission = reddit.submission(id=submission_id)
    submission.comments.replace_more(limit=None)
    print(f"Downloading: {submission.title}")

    data = []
    for comment in submission.comments.list():
        if comment.author:
            data.append({
                "id": comment.id,
                "author": str(comment.author),
                "parent_id": comment.parent_id,
                "body": comment.body
            })

    with open(cache_path, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    return data

 def analyze_word_frequency(data, n=1000):
    """
    Analyzes all comment bodies for word frequency.
    Returns a DataFrame of the top 'n' words.
    """
    import re
    from nltk.corpus import stopwords
    # NOTE: You may need to download NLTK resources: 
    # import nltk; nltk.download('stopwords')

    # Aggregate all text
    all_text = " ".join([c.get("body", "") for c in data if c.get("body")])


    # Basic text cleaning: lowercase and remove non-alphanumeric characters
    text = all_text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)

    # Tokenize and remove stop words
    words = text.split()
    
    # Common English and Indonesian stop words (adjust as needed)
    stop_words = set(stopwords.words('english'))
    # Minimal Indonesian stop words as NLTK might not be configured for it
    # You might want to use a more complete list (e.g., from sastrawi) for r/indonesia
    indonesian_stopwords = {"yang", "dan", "di", "ke", "dari", "ini", "itu", "atau", "untuk", "dengan", "adalah", "tidak", "ya", "saja", "udah", "lagi", "pun"}
    stop_words.update(indonesian_stopwords)

    filtered_words = [word for word in words if word not in stop_words and len(word) > 1 and not word.isdigit()]
    
    word_counts = Counter(filtered_words)
    
    # Convert to DataFrame
    df_words = pd.DataFrame(word_counts.most_common(n), columns=["Word", "Frequency"])
    return df_words


 def analyze_reddit_pair_interactions(data):
    """
    Analyze comment threads to find which user pairs interacted the most deeply.
    A pair (A, B) is counted when they exchange alternating replies (A→B→A→B...).
    """
    # Build parent→children mapping
    global pair_scores

    children = {}
    by_id = {}
    for c in data:
        cid = c["id"]
        by_id[cid] = c
        parent = c["parent_id"].split("_")[1] if c["parent_id"].startswith("t1_") else None
        if parent:
            children.setdefault(parent, []).append(c)

    pair_depths = Counter()

    def traverse(node_id, last_author=None, current_pair=None):
        node = by_id.get(node_id)
        if not node:
            return
        author = node["author"]

        if current_pair and last_author and author != last_author:
            normalized_pair = tuple(sorted(current_pair))
            pair_depths[normalized_pair] += 1
            next_pair = (author, last_author)
        else:
            next_pair = (author, last_author) if last_author else None

        for child in children.get(node_id, []):
            traverse(child["id"], last_author=author, current_pair=next_pair)

    # Start traversal from top-level comments
    for c in data:
        if c["parent_id"].startswith("t3_"):  # top-level
            traverse(c["id"], None, None)

    pair_scores = {tuple(sorted((a, b))): count for (a, b), count in pair_depths.items()}

    # Convert to DataFrame
    df_pairs = pd.DataFrame([
        {"User A": a, "User B": b, "Interactions": count}
        for (a, b), count in pair_depths.items()
    ])
    if df_pairs.empty:
        return pd.DataFrame(columns=["User A", "User B", "Interactions"])
    return df_pairs.sort_values(by="Interactions", ascending=False).reset_index(drop=True)


 def build_graph_from_data(data):
    global pair_scores
    G = nx.DiGraph()
    id_to_author = {c["id"]: c["author"] for c in data}

    for c in data:
        if c["parent_id"].startswith("t1_"):
            parent_id = c["parent_id"].split("_")[1]
            if parent_id in id_to_author:
                parent_author = id_to_author[parent_id]
                child_author = c["author"]
                if parent_author != child_author:
                    weight = pair_scores.get(tuple(sorted((child_author, parent_author))), 1)
                    G.add_edge(child_author, parent_author, weight=weight)
    return G


 def analyze_graph(G):
    in_deg = dict(G.in_degree())
    out_deg = dict(G.out_degree())

    df = pd.DataFrame({
        "user": list(G.nodes),
        "popularity": [in_deg.get(u, 0) for u in G.nodes],
        "interactivity": [out_deg.get(u, 0) for u in G.nodes],
    })
    df["total_degree"] = df["popularity"] + df["interactivity"]
    df = df.sort_values("total_degree", ascending=False).reset_index(drop=True)
    return df


 # ===== NEW FUNCTION: user pair analysis =====
 def analyze_user_pairs(G):
    # Convert directed edges into undirected pairs for counting mutual interactions
    edges = [tuple(sorted((u, v))) for u, v in G.edges()]
    pair_counts = Counter(edges)

    df_pairs = pd.DataFrame(pair_counts.items(), columns=["User Pair", "Interactions"])
    df_pairs[["User A", "User B"]] = pd.DataFrame(df_pairs["User Pair"].tolist(), index=df_pairs.index)
    df_pairs = df_pairs[["User A", "User B", "Interactions"]]
    df_pairs = df_pairs.sort_values(by="Interactions", ascending=False).reset_index(drop=True)

    return df_pairs


 def build_interactive_plot(G):

    pos = nx.spring_layout(G, k=0.5, iterations=50, dim=2, seed=42, weight='weight')
    edge_x, edge_y = [], []

    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines'
    )

    node_x, node_y, text = [], [], []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        text.append(node)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        text=text,
        textposition='top center',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale='YlGnBu',
            reversescale=True,
            color=[len(list(G.neighbors(n))) for n in G.nodes()],
            size=10,
            colorbar=dict(
                thickness=10,
                title='Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=1
        )
    )

    fig = go.Figure(data=[edge_trace, node_trace],
                    layout=go.Layout(
                        title="Reddit User Interaction Graph",
                        title_x=0.5,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=0, l=0, r=0, t=40),
                        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
                    ))
    return fig


 # ================= MAIN APP ==================
 def main(urls):
    all_data = []
    for url in urls:
        data = fetch_or_load_submission(url)
        all_data.extend(data)

    unique_users = len(set(c["author"] for c in all_data))

    df_word_freq = analyze_word_frequency(all_data, n=1000)
    print(df_word_freq)

    df_pairs = analyze_reddit_pair_interactions(all_data)
    total_interactions = df_pairs['Interactions'].sum() if not df_pairs.empty else 0

    G = build_graph_from_data(all_data)
    df = analyze_graph(G)
    #df_pairs = analyze_user_pairs(G)  # new analysis
    
    all_data_global = all_data
    top_pairs = df_pairs.head(2)
    fig = build_interactive_plot(G)
 

    app = Dash(__name__)

    app.layout = html.Div([
        html.H1("Reddit User Interaction Analyzer", style={'textAlign': 'center'}),
        html.H3(f"Total Unique Users Analyzed: {unique_users}", style={'textAlign': 'center', 'color': '#007BFF'}),
        html.H3(f"Total Pair Interactions (A↔B Exchanges): {total_interactions}", style={'textAlign': 'center', 'color': '#28a745'}),

        html.Div([
            html.H3("Reddit User Interaction Graph", style={'textAlign': 'center'}),

            html.Div([
                # Left side — Graph
                html.Div([
                    dcc.Graph(id="interaction-graph", figure=fig, style={
                        'height': '700px',
                        'width': '800px',
                        'border': '1px solid #ccc',
                        'borderRadius': '8px',
                        'padding': '5px'
                    }),
                ], style={'flex': '2', 'marginRight': '15px'}),

                # Right side — Selected User Info
                html.Div([
                    html.H3("Selected User Connections", style={'textAlign': 'center'}),
                    html.Div(id="selected-user-info", style={
                        'fontWeight': 'bold',
                        'marginBottom': '10px',
                        'textAlign': 'center'
                    }),
                    dash_table.DataTable(
                        id="connections-table",
                        columns=[
                            {"name": "Direction", "id": "direction"},
                            {"name": "Connected User", "id": "connected_user"},
                            {"name": "Interactions", "id": "interactions"}
                        ],
                        page_size=300,
                        style_table={'overflowY': 'auto', 'height': '600px'},
                        style_cell={'textAlign': 'left', 'padding': '6px'},
                        style_header={'fontWeight': 'bold', 'backgroundColor': '#f0f0f0'},
                    )
                ], style={
                    'flex': '1',
                    'border': '1px solid #ddd',
                    'borderRadius': '8px',
                    'padding': '10px',
                    'backgroundColor': '#fafafa',
                    'boxShadow': '0 0 6px rgba(0,0,0,0.1)'
                }),

            ], style={
                'display': 'flex',
                'flexDirection': 'row',
                'justifyContent': 'space-between',
                'alignItems': 'flex-start'
            })
        ], style={'marginBottom': '40px'}),

        html.H2("User Interaction Summary", style={'textAlign': 'center'}),
        dash_table.DataTable(
            id="main-table",
            columns=[{"name": c, "id": c} for c in df.columns],
            data=df.to_dict("records"),
            page_size=100,
            
            sort_action="native",
            style_table={'overflowX': 'auto'},
            style_cell={'textAlign': 'left', 'padding': '6px'}
        ),

        html.H2("Most Interactive User Pairs (from real comment threads)",
        style={'textAlign': 'center', 'marginTop': '40px'}),

        html.Div(
            f"Top two pairs: "
            f"{df_pairs.iloc[0]['User A']} ↔ {df_pairs.iloc[0]['User B']} "
            f"({df_pairs.iloc[0]['Interactions']} exchanges) and "
            f"{df_pairs.iloc[1]['User A']} ↔ {df_pairs.iloc[1]['User B']} "
            f"({df_pairs.iloc[1]['Interactions']} exchanges)."
            if not df_pairs.empty else "No significant user pairs found.",
            style={'textAlign': 'center', 'fontWeight': 'bold', 'marginBottom': '10px'}
        ),

        dash_table.DataTable(
            id="pair-thread-table",
            columns=[{"name": c, "id": c} for c in df_pairs.columns],
            data=df_pairs.to_dict("records"),
            page_size=500,
            sort_action="native",
            style_table={'overflowX': 'auto'},
            style_cell={'textAlign': 'left', 'padding': '6px'},
            style_header={'fontWeight': 'bold', 'backgroundColor': '#f0f0f0'}
        ),

        html.H2("Top 1000 Word Frequency List (Excluding Stop Words)",
            style={'textAlign': 'center', 'marginTop': '40px'}),
        
        dash_table.DataTable(
            id="word-freq-table",
            columns=[{"name": c, "id": c} for c in df_word_freq.columns],
            data=df_word_freq.to_dict("records"),
            page_size=1000,
            style_table={'overflowX': 'auto', 'marginBottom': '40px'},
            style_cell={'textAlign': 'left', 'padding': '6px'},
            style_header={'fontWeight': 'bold', 'backgroundColor': '#f0f0f0'}
        )
    ])

    @app.callback(
        [Output("selected-user-info", "children"),
         Output("connections-table", "data")],
        [Input("interaction-graph", "clickData")],
        [State("interaction-graph", "figure")]
    )




    def display_node_connections(clickData, figure):
        if not clickData or "points" not in clickData:
            return "Click a node to view connections", []
        clicked_user = clickData["points"][0]["text"]
        if clicked_user not in G.nodes:
            return f"{clicked_user} not found in graph.", []

        # Count interactions
        in_counts = {}
        out_counts = {}
        for u, v in G.edges():
            if v == clicked_user:  # incoming edge (user replied to clicked_user)
                in_counts[u] = in_counts.get(u, 0) + 1
            elif u == clicked_user:  # outgoing edge (clicked_user replied to user)
                out_counts[v] = out_counts.get(v, 0) + 1

        data = []
        for user, count in sorted(in_counts.items(), key=lambda x: x[1], reverse=True):
            data.append({
                "direction": "Incoming (replied by)",
                "connected_user": user,
                "interactions": pair_scores.get(tuple(sorted((user, clicked_user))), 0)
            })
        for user, count in sorted(out_counts.items(), key=lambda x: x[1], reverse=True):
            data.append({
                "direction": "Outgoing (replied to)",
                "connected_user": user,
                "interactions": pair_scores.get(tuple(sorted((user, clicked_user))), 0)
            })

        data = sorted(data, key=lambda x: x["interactions"], reverse=True)
        return f"Connections for user: {clicked_user}", data


    # Optional static export
  

    app.run_server(debug=True)


 if __name__ == "__main__":
    urls = [
        "https://www.reddit.com/r/indonesia/comments/1o2maf5/10_october_2025_daily_chat_thread/"
    ]
    main(urls)
	import os
	import json
	import praw
	import networkx as nx
	import pandas as pd
	from urllib.parse import urlparse
	from dash import Dash, html, dcc, dash_table
	from dash.dependencies import Input, Output, State

	import plotly.graph_objects as go
	from collections import Counter
	import nltk
	nltk.download('stopwords')

	global pair_scores
	pair_scores = {}


	# ================= CONFIGURATION ==================
	# https://www.reddit.com/prefs/apps/
	CLIENT_ID = "ENTER HERE"
	CLIENT_SECRET = "ENTER HERE"
	USER_AGENT = "ENTER HERE"

	CACHE_DIR = "reddit_cache"
	os.makedirs(CACHE_DIR, exist_ok=True)

	reddit = praw.Reddit(
	client_id=CLIENT_ID,
	client_secret=CLIENT_SECRET,
	user_agent=USER_AGENT
	)


	# ================= UTILITIES ==================
	def extract_submission_id(url):
	path_parts = urlparse(url).path.strip("/").split("/")
	try:
	idx = path_parts.index("comments") + 1
	return path_parts[idx]
	except (ValueError, IndexError):
	raise ValueError(f"Invalid Reddit URL: {url}")


	def fetch_or_load_submission(url):
	"""Fetch thread comments from Reddit or load from cache"""
	submission_id = extract_submission_id(url)
	cache_path = os.path.join(CACHE_DIR, f"{submission_id}.json")

	if os.path.exists(cache_path):
	print(f"Loading cached thread: {submission_id}")
	print(cache_path)
	with open(cache_path, "r", encoding="utf-8") as f:
	return json.load(f)

	submission = reddit.submission(id=submission_id)
	submission.comments.replace_more(limit=None)
	print(f"Downloading: {submission.title}")

	data = []
	for comment in submission.comments.list():
	if comment.author:
	data.append({
	"id": comment.id,
	"author": str(comment.author),
	"parent_id": comment.parent_id,
	"body": comment.body
	})

	with open(cache_path, "w", encoding="utf-8") as f:
	json.dump(data, f, ensure_ascii=False, indent=2)
	return data

	def analyze_word_frequency(data, n=1000):
	"""
	Analyzes all comment bodies for word frequency.
	Returns a DataFrame of the top 'n' words.
	"""
	import re
	from nltk.corpus import stopwords
	# NOTE: You may need to download NLTK resources:
	# import nltk; nltk.download('stopwords')

	# Aggregate all text
	all_text = " ".join([c.get("body", "") for c in data if c.get("body")])


	# Basic text cleaning: lowercase and remove non-alphanumeric characters
	text = all_text.lower()
	text = re.sub(r'[^a-z0-9\s]', '', text)

	# Tokenize and remove stop words
	words = text.split()

	# Common English and Indonesian stop words (adjust as needed)
	stop_words = set(stopwords.words('english'))
	# Minimal Indonesian stop words as NLTK might not be configured for it
	# You might want to use a more complete list (e.g., from sastrawi) for r/indonesia
	indonesian_stopwords = {"yang", "dan", "di", "ke", "dari", "ini", "itu", "atau", "untuk", "dengan", "adalah", "tidak", "ya", "saja", "udah", "lagi", "pun"}
	stop_words.update(indonesian_stopwords)

	filtered_words = [word for word in words if word not in stop_words and len(word) > 1 and not word.isdigit()]

	word_counts = Counter(filtered_words)

	# Convert to DataFrame
	df_words = pd.DataFrame(word_counts.most_common(n), columns=["Word", "Frequency"])
	return df_words


	def analyze_reddit_pair_interactions(data):
	"""
	Analyze comment threads to find which user pairs interacted the most deeply.
	A pair (A, B) is counted when they exchange alternating replies (A→B→A→B...).
	"""
	# Build parent→children mapping
	global pair_scores

	children = {}
	by_id = {}
	for c in data:
	cid = c["id"]
	by_id[cid] = c
	parent = c["parent_id"].split("_")[1] if c["parent_id"].startswith("t1_") else None
	if parent:
	children.setdefault(parent, []).append(c)

	pair_depths = Counter()

	def traverse(node_id, last_author=None, current_pair=None):
	node = by_id.get(node_id)
	if not node:
	return
	author = node["author"]

	if current_pair and last_author and author != last_author:
	normalized_pair = tuple(sorted(current_pair))
	pair_depths[normalized_pair] += 1
	next_pair = (author, last_author)
	else:
	next_pair = (author, last_author) if last_author else None

	for child in children.get(node_id, []):
	traverse(child["id"], last_author=author, current_pair=next_pair)

	# Start traversal from top-level comments
	for c in data:
	if c["parent_id"].startswith("t3_"): # top-level
	traverse(c["id"], None, None)

	pair_scores = {tuple(sorted((a, b))): count for (a, b), count in pair_depths.items()}

	# Convert to DataFrame
	df_pairs = pd.DataFrame([
	{"User A": a, "User B": b, "Interactions": count}
	for (a, b), count in pair_depths.items()
	])
	if df_pairs.empty:
	return pd.DataFrame(columns=["User A", "User B", "Interactions"])
	return df_pairs.sort_values(by="Interactions", ascending=False).reset_index(drop=True)


	def build_graph_from_data(data):
	global pair_scores
	G = nx.DiGraph()
	id_to_author = {c["id"]: c["author"] for c in data}

	for c in data:
	if c["parent_id"].startswith("t1_"):
	parent_id = c["parent_id"].split("_")[1]
	if parent_id in id_to_author:
	parent_author = id_to_author[parent_id]
	child_author = c["author"]
	if parent_author != child_author:
	weight = pair_scores.get(tuple(sorted((child_author, parent_author))), 1)
	G.add_edge(child_author, parent_author, weight=weight)
	return G


	def analyze_graph(G):
	in_deg = dict(G.in_degree())
	out_deg = dict(G.out_degree())

	df = pd.DataFrame({
	"user": list(G.nodes),
	"popularity": [in_deg.get(u, 0) for u in G.nodes],
	"interactivity": [out_deg.get(u, 0) for u in G.nodes],
	})
	df["total_degree"] = df["popularity"] + df["interactivity"]
	df = df.sort_values("total_degree", ascending=False).reset_index(drop=True)
	return df


	# ===== NEW FUNCTION: user pair analysis =====
	def analyze_user_pairs(G):
	# Convert directed edges into undirected pairs for counting mutual interactions
	edges = [tuple(sorted((u, v))) for u, v in G.edges()]
	pair_counts = Counter(edges)

	df_pairs = pd.DataFrame(pair_counts.items(), columns=["User Pair", "Interactions"])
	df_pairs[["User A", "User B"]] = pd.DataFrame(df_pairs["User Pair"].tolist(), index=df_pairs.index)
	df_pairs = df_pairs[["User A", "User B", "Interactions"]]
	df_pairs = df_pairs.sort_values(by="Interactions", ascending=False).reset_index(drop=True)

	return df_pairs


	def build_interactive_plot(G):

	pos = nx.spring_layout(G, k=0.5, iterations=50, dim=2, seed=42, weight='weight')
	edge_x, edge_y = [], []

	for edge in G.edges():
	x0, y0 = pos[edge[0]]
	x1, y1 = pos[edge[1]]
	edge_x += [x0, x1, None]
	edge_y += [y0, y1, None]

	edge_trace = go.Scatter(
	x=edge_x, y=edge_y,
	line=dict(width=0.5, color='#888'),
	hoverinfo='none',
	mode='lines'
	)

	node_x, node_y, text = [], [], []
	for node in G.nodes():
	x, y = pos[node]
	node_x.append(x)
	node_y.append(y)
	text.append(node)

	node_trace = go.Scatter(
	x=node_x, y=node_y,
	mode='markers',
	text=text,
	textposition='top center',
	hoverinfo='text',
	marker=dict(
	showscale=True,
	colorscale='YlGnBu',
	reversescale=True,
	color=[len(list(G.neighbors(n))) for n in G.nodes()],
	size=10,
	colorbar=dict(
	thickness=10,
	title='Connections',
	xanchor='left',
	titleside='right'
	),
	line_width=1
	)
	)

	fig = go.Figure(data=[edge_trace, node_trace],
	layout=go.Layout(
	title="Reddit User Interaction Graph",
	title_x=0.5,
	showlegend=False,
	hovermode='closest',
	margin=dict(b=0, l=0, r=0, t=40),
	xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
	yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
	))
	return fig


	# ================= MAIN APP ==================
	def main(urls):
	all_data = []
	for url in urls:
	data = fetch_or_load_submission(url)
	all_data.extend(data)

	unique_users = len(set(c["author"] for c in all_data))

	df_word_freq = analyze_word_frequency(all_data, n=1000)
	print(df_word_freq)

	df_pairs = analyze_reddit_pair_interactions(all_data)
	total_interactions = df_pairs['Interactions'].sum() if not df_pairs.empty else 0

	G = build_graph_from_data(all_data)
	df = analyze_graph(G)
	#df_pairs = analyze_user_pairs(G) # new analysis

	all_data_global = all_data
	top_pairs = df_pairs.head(2)
	fig = build_interactive_plot(G)


	app = Dash(__name__)

	app.layout = html.Div([
	html.H1("Reddit User Interaction Analyzer", style={'textAlign': 'center'}),
	html.H3(f"Total Unique Users Analyzed: {unique_users}", style={'textAlign': 'center', 'color': '#007BFF'}),
	html.H3(f"Total Pair Interactions (A↔B Exchanges): {total_interactions}", style={'textAlign': 'center', 'color': '#28a745'}),

	html.Div([
	html.H3("Reddit User Interaction Graph", style={'textAlign': 'center'}),

	html.Div([
	# Left side — Graph
	html.Div([
	dcc.Graph(id="interaction-graph", figure=fig, style={
	'height': '700px',
	'width': '800px',
	'border': '1px solid #ccc',
	'borderRadius': '8px',
	'padding': '5px'
	}),
	], style={'flex': '2', 'marginRight': '15px'}),

	# Right side — Selected User Info
	html.Div([
	html.H3("Selected User Connections", style={'textAlign': 'center'}),
	html.Div(id="selected-user-info", style={
	'fontWeight': 'bold',
	'marginBottom': '10px',
	'textAlign': 'center'
	}),
	dash_table.DataTable(
	id="connections-table",
	columns=[
	{"name": "Direction", "id": "direction"},
	{"name": "Connected User", "id": "connected_user"},
	{"name": "Interactions", "id": "interactions"}
	],
	page_size=300,
	style_table={'overflowY': 'auto', 'height': '600px'},
	style_cell={'textAlign': 'left', 'padding': '6px'},
	style_header={'fontWeight': 'bold', 'backgroundColor': '#f0f0f0'},
	)
	], style={
	'flex': '1',
	'border': '1px solid #ddd',
	'borderRadius': '8px',
	'padding': '10px',
	'backgroundColor': '#fafafa',
	'boxShadow': '0 0 6px rgba(0,0,0,0.1)'
	}),

	], style={
	'display': 'flex',
	'flexDirection': 'row',
	'justifyContent': 'space-between',
	'alignItems': 'flex-start'
	})
	], style={'marginBottom': '40px'}),

	html.H2("User Interaction Summary", style={'textAlign': 'center'}),
	dash_table.DataTable(
	id="main-table",
	columns=[{"name": c, "id": c} for c in df.columns],
	data=df.to_dict("records"),
	page_size=100,

	sort_action="native",
	style_table={'overflowX': 'auto'},
	style_cell={'textAlign': 'left', 'padding': '6px'}
	),

	html.H2("Most Interactive User Pairs (from real comment threads)",
	style={'textAlign': 'center', 'marginTop': '40px'}),

	html.Div(
	f"Top two pairs: "
	f"{df_pairs.iloc[0]['User A']} ↔ {df_pairs.iloc[0]['User B']} "
	f"({df_pairs.iloc[0]['Interactions']} exchanges) and "
	f"{df_pairs.iloc[1]['User A']} ↔ {df_pairs.iloc[1]['User B']} "
	f"({df_pairs.iloc[1]['Interactions']} exchanges)."
	if not df_pairs.empty else "No significant user pairs found.",
	style={'textAlign': 'center', 'fontWeight': 'bold', 'marginBottom': '10px'}
	),

	dash_table.DataTable(
	id="pair-thread-table",
	columns=[{"name": c, "id": c} for c in df_pairs.columns],
	data=df_pairs.to_dict("records"),
	page_size=500,
	sort_action="native",
	style_table={'overflowX': 'auto'},
	style_cell={'textAlign': 'left', 'padding': '6px'},
	style_header={'fontWeight': 'bold', 'backgroundColor': '#f0f0f0'}
	),

	html.H2("Top 1000 Word Frequency List (Excluding Stop Words)",
	style={'textAlign': 'center', 'marginTop': '40px'}),

	dash_table.DataTable(
	id="word-freq-table",
	columns=[{"name": c, "id": c} for c in df_word_freq.columns],
	data=df_word_freq.to_dict("records"),
	page_size=1000,
	style_table={'overflowX': 'auto', 'marginBottom': '40px'},
	style_cell={'textAlign': 'left', 'padding': '6px'},
	style_header={'fontWeight': 'bold', 'backgroundColor': '#f0f0f0'}
	)
	])

	@app.callback(
	[Output("selected-user-info", "children"),
	Output("connections-table", "data")],
	[Input("interaction-graph", "clickData")],
	[State("interaction-graph", "figure")]
	)




	def display_node_connections(clickData, figure):
	if not clickData or "points" not in clickData:
	return "Click a node to view connections", []
	clicked_user = clickData["points"][0]["text"]
	if clicked_user not in G.nodes:
	return f"{clicked_user} not found in graph.", []

	# Count interactions
	in_counts = {}
	out_counts = {}
	for u, v in G.edges():
	if v == clicked_user: # incoming edge (user replied to clicked_user)
	in_counts[u] = in_counts.get(u, 0) + 1
	elif u == clicked_user: # outgoing edge (clicked_user replied to user)
	out_counts[v] = out_counts.get(v, 0) + 1

	data = []
	for user, count in sorted(in_counts.items(), key=lambda x: x[1], reverse=True):
	data.append({
	"direction": "Incoming (replied by)",
	"connected_user": user,
	"interactions": pair_scores.get(tuple(sorted((user, clicked_user))), 0)
	})
	for user, count in sorted(out_counts.items(), key=lambda x: x[1], reverse=True):
	data.append({
	"direction": "Outgoing (replied to)",
	"connected_user": user,
	"interactions": pair_scores.get(tuple(sorted((user, clicked_user))), 0)
	})

	data = sorted(data, key=lambda x: x["interactions"], reverse=True)
	return f"Connections for user: {clicked_user}", data


	# Optional static export


	app.run_server(debug=True)


	if __name__ == "__main__":
	urls = [
	"https://www.reddit.com/r/indonesia/comments/1o2maf5/10_october_2025_daily_chat_thread/"
	]
	main(urls)