TigerGraph-DevLabs
diff --git a/‎CHANGELOG.md
Lines changed: 6 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/chatbot_langgraph/prompts/__init__.py
Lines changed: 12 additions & 1 deletion b/‎examples/chatbot_langgraph/prompts/__init__.py
Lines changed: 12 additions & 1 deletion
diff --git a/‎examples/chatbot_langgraph/prompts/algorithms.py
Lines changed: 168 additions & 0 deletions b/‎examples/chatbot_langgraph/prompts/algorithms.py
Lines changed: 168 additions & 0 deletions
diff --git a/‎examples/chatbot_langgraph/prompts/queries/__init__.py
Lines changed: 7 additions & 0 deletions b/‎examples/chatbot_langgraph/prompts/queries/__init__.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/chatbot_langgraph/prompts/queries/pagerank_query.py
Lines changed: 62 additions & 0 deletions b/‎examples/chatbot_langgraph/prompts/queries/pagerank_query.py
Lines changed: 62 additions & 0 deletions
diff --git a/‎examples/chatbot_langgraph/prompts/queries/wcc_query.py
Lines changed: 62 additions & 0 deletions b/‎examples/chatbot_langgraph/prompts/queries/wcc_query.py
Lines changed: 62 additions & 0 deletions
diff --git a/‎examples/chatbot_langgraph/prompts/task_flow.py
Lines changed: 42 additions & 0 deletions b/‎examples/chatbot_langgraph/prompts/task_flow.py
Lines changed: 42 additions & 0 deletions
@@ -1,6 +1,12 @@
 # Changelog
 
 ---
+### 0.1.5
+- feat: add example using LangGraph LLM and agents to call TigerGraph-MCP
+- feat: upgrade TigerGraphX to v0.2.9 with new TigerGraphDatabase class integration
+- feat: add run algorithm subgraph to onboarding workflow
+- feat: add MCP tools for update data source, get/drop all data sources, run gsql, and list metadata
+
 ### 0.1.4
 - feat: enable multi-turn human-in-the-loop schema confirmation for iterative refinement
 - feat: enable multi-turn human-in-the-loop loading job confirmation for iterative refinement
 
@@ -1,4 +1,5 @@
 from .planner import ONBOARDING_DETECTOR_PROMPT
+from .task_flow import PLAN_TOOL_EXECUTION_PROMPT
 from .onboarding import PREVIEW_SAMPLE_DATA_PROMPT
 from .schema import (
     CLASSIFY_COLUMNS_PROMPT,
@@ -14,12 +15,18 @@
     EDIT_LOADING_JOB_PROMPT,
     RUN_LOADING_JOB_PROMPT,
 )
+from .algorithms import (
+    SUGGEST_ALGORITHMS_PROMPT,
+    EDIT_ALGORITHM_SELECTION_PROMPT,
+    RUN_ALGORITHMS_PROMPT,
+)
 
 __all__ = [
     "ONBOARDING_DETECTOR_PROMPT",
+    "PLAN_TOOL_EXECUTION_PROMPT",
     "PREVIEW_SAMPLE_DATA_PROMPT",
-    "CLASSIFY_COLUMNS_PROMPT",
     # Schema
+    "CLASSIFY_COLUMNS_PROMPT",
     "DRAFT_SCHEMA_PROMPT",
     "EDIT_SCHEMA_PROMPT",
     "CREATE_SCHEMA_PROMPT",
@@ -30,4 +37,8 @@
     "LOAD_CONFIG_EDGE_MAPPING_PROMPT",
     "EDIT_LOADING_JOB_PROMPT",
     "RUN_LOADING_JOB_PROMPT",
+    # Algorithms
+    "SUGGEST_ALGORITHMS_PROMPT",
+    "EDIT_ALGORITHM_SELECTION_PROMPT",
+    "RUN_ALGORITHMS_PROMPT",
 ]
@@ -0,0 +1,168 @@
+from .queries import WCC_QUERY, PAGERANK_QUERY
+
+SUGGEST_ALGORITHMS_PROMPT = """
+## Objective
+
+Suggest suitable graph algorithms for the user to run, based on the current TigerGraph schema. Provide a brief explanation of each algorithm’s purpose and when it is applicable.
+
+## Instructions
+
+1. **Inspect the graph schema**:
+   - Identify all edge types along with their direction (directed or undirected) and source/target vertex types.
+
+2. **Suggest WCC (Weakly Connected Components)** if:
+   - The schema contains at least one undirected edge type.
+   - Do not suggest WCC if all edges are directed.
+
+3. **Suggest PageRank** if:
+   - There is at least one directed edge type where the source and target node types are the same.
+   - Do not suggest PageRank if such edges are not found.
+
+4. Do **not** mention any algorithm that is not applicable. Only include suggested algorithms.
+
+5. For each suggested algorithm, include:
+   - The algorithm name
+   - A short, user-friendly explanation of what it does
+   - The kind of insight or output the user might expect
+
+6. If no algorithms are applicable, reply with a short explanation that nothing is recommended based on current schema.
+
+7. End the message by asking the user to confirm if the suggested algorithms look good, or if they want to revise.
+
+## Output Format
+
+Respond in natural language. Examples:
+
+**If both algorithms are suggested:**
+```
+
+Based on your graph structure, I suggest the following algorithms:
+
+✅ **WCC (Weakly Connected Components)**
+Helps identify clusters of interconnected nodes based on undirected edges. Useful for finding isolated communities or disconnected parts of your graph.
+
+✅ **PageRank**
+Ranks nodes by importance using link structure. Commonly used to find influential or highly connected nodes in a network.
+
+Please confirm if this looks good by replying with "confirmed", "approved", "go ahead", or "ok". Or let me know if you'd like to revise anything.
+
+```
+
+**If only one algorithm is suggested (e.g., WCC):**
+```
+
+Based on your graph structure, I suggest the following algorithm:
+
+✅ **WCC (Weakly Connected Components)**
+Helps identify clusters of interconnected nodes based on undirected edges. Useful for finding isolated communities or disconnected parts of your graph.
+
+Please confirm if this looks good by replying with "confirmed", "approved", "go ahead", or "ok". Or let me know if you'd like to revise anything.
+
+```
+
+**If no algorithms are applicable:**
+```
+
+There are currently no suitable algorithms to run based on the structure of your graph.
+
+Please confirm if this looks good by replying with "confirmed", "approved", "go ahead", or "ok". Or let me know if you'd like to revise anything.
+
+```
+"""
+EDIT_ALGORITHM_SELECTION_PROMPT = """
+## Objective
+
+Revise the algorithm selections (WCC and PageRank) based on the user's feedback and confirm the updated choices.
+
+## Instructions
+
+- First, interpret the user's latest input to adjust the algorithm selection.
+  - If the feedback clearly includes or excludes WCC and/or PageRank, update the selection accordingly.
+  - If the feedback is ambiguous, incomplete, or includes unsupported algorithms, politely explain that only WCC and PageRank are currently supported.
+- Show the updated selection to the user.
+- Ask for final confirmation using accepted phrases: "confirmed", "approved", "go ahead", or "ok".
+
+## Output Format
+
+Summarize the final selection for confirmation:
+
+```
+
+Here’s your current selection:
+
+✅ WCC: Yes / No
+✅ PageRank: Yes / No
+
+Please confirm to proceed by replying with "confirmed", "approved", "go ahead", or "ok". Or let me know if you'd like to revise anything.
+
+```
+"""
+
+RUN_ALGORITHMS_PROMPT = f"""
+## Objective
+
+Create, install, and run the selected graph algorithms (WCC and/or PageRank) on the current schema.
+
+## Instructions
+
+1. If no algorithms are selected, do not run any queries.  
+   Simply return:  
+   "No algorithms were selected. Onboarding is now complete."
+
+2. For each selected algorithm, perform the following steps using tool calls:
+   - Use `CREATE_QUERY` with the corresponding GSQL code below.
+   - Use `INSTALL_QUERY` to install the query.
+   - Use `RUN_QUERY` with the appropriate parameters.
+
+---
+
+### WCC (tg_wcc)
+
+- Only run if the user has confirmed.
+- Parameters:
+  - `e_type_set`: all undirected edge types
+  - `v_type_set`: all node types connected by undirected edges
+  - `print_limit`: -1
+  - All other parameters should use default values.
+- Expected output: number of connected components.
+
+**GSQL Code:**
+```
+
+{WCC_QUERY}
+
+```
+
+---
+
+### PageRank (tg_pagerank)
+
+- Only run if the user has confirmed.
+- Only run if there exists a valid (`v_type`, `e_type`) pair such that:
+  - `e_type` is a directed edge.
+  - Both the source and target types of `e_type` are the same and equal to `v_type`.
+- Parameters:
+  - `v_type`: node type connected by `e_type`
+  - `e_type`: directed edge type whose source and target types are both `v_type`
+  - All other parameters should use default values.
+- Expected output: comprehensive ranking details from `pagerank_top_nodes`.
+
+**GSQL Code:**
+```
+
+{PAGERANK_QUERY}
+
+```
+
+## Output Format
+
+```
+
+✅ WCC completed. Top connected components:
+
+✅ PageRank completed. Top-ranked nodes:
+
+✅ All selected algorithms have completed. Onboarding is now complete.
+
+```
+"""
@@ -0,0 +1,7 @@
+from .pagerank_query import PAGERANK_QUERY
+from .wcc_query import WCC_QUERY
+
+__all__ = [
+    "PAGERANK_QUERY",
+    "WCC_QUERY",
+]
@@ -0,0 +1,62 @@
+PAGERANK_QUERY = """
+CREATE OR REPLACE DISTRIBUTED QUERY tg_pagerank (STRING v_type, STRING e_type,
+  FLOAT max_change=0.001, INT maximum_iteration=25, FLOAT damping=0.85, INT top_k = 100,
+  BOOL print_results = TRUE, STRING result_attribute =  "", STRING file_path = "",
+  BOOL display_edges = FALSE) SYNTAX V1 {
+
+  TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
+  HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
+  SetAccum<VERTEX> @@top_vertices;      # vertices with top score
+  MaxAccum<FLOAT> @@max_diff = 9999;    # max score change in an iteration
+  SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
+  SumAccum<FLOAT> @sum_score = 1;           # initial score for every vertex is 1.
+  SetAccum<EDGE> @@edge_set;             # list of all edges, if display is needed
+  FILE f (file_path);
+
+  # PageRank iterations	
+  Start = {v_type};                     # Start with all vertices of specified type(s)
+  WHILE @@max_diff > max_change 
+      LIMIT maximum_iteration DO
+          @@max_diff = 0;
+      V = SELECT s
+  	FROM Start:s -(e_type:e)- v_type:t
+  	ACCUM 
+              t.@sum_recvd_score += s.@sum_score/(s.outdegree(e_type)) 
+  	POST-ACCUM 
+              s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score,
+  	    s.@sum_recvd_score = 0,
+  	    @@max_diff += abs(s.@sum_score - s.@sum_score');
+  END; # END WHILE loop
+
+  # Output
+  IF file_path != "" THEN
+      f.println("Vertex_ID", "PageRank");
+  END;
+  V = SELECT s 
+      FROM Start:s
+      POST-ACCUM 
+          IF result_attribute != "" THEN 
+              s.setAttr(result_attribute, s.@sum_score) 
+          END,
+  	IF file_path != "" THEN 
+              f.println(s, s.@sum_score) 
+          END,
+  	IF print_results THEN 
+              @@top_scores_heap += Vertex_Score(s, s.@sum_score) 
+          END;
+  IF print_results THEN
+      PRINT @@top_scores_heap AS pagerank_top_nodes;
+      IF display_edges THEN
+          FOREACH vert IN @@top_scores_heap DO
+              @@top_vertices += vert.Vertex_ID;
+          END;
+          Top = {@@top_vertices};
+          Top = SELECT s
+  	        FROM Top:s -(e_type:e)- v_type:t
+              WHERE @@top_vertices.contains(t)
+  	        ACCUM @@edge_set += e;
+          PRINT Top;
+      END;
+  END;
+}
+"""
@@ -0,0 +1,62 @@
+WCC_QUERY = """
+CREATE OR REPLACE DISTRIBUTED QUERY tg_wcc (SET<STRING> v_type_set, SET<STRING> e_type_set, INT print_limit = 100,
+  BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {
+  /*
+   This query identifies the Connected Components (undirected edges). When finished, each
+   vertex is assigned an INT label = its component ID number.
+    v_type_set: vertex types to traverse          print_results: print JSON output
+    e_type_set: edge types to traverse            result_attribute: INT attribute to store results to
+    file_path: file to write CSV output to    display_edges: output edges for visualization
+    print_limit: max #vertices to output (-1 = all)  
+  */
+
+  MinAccum<INT> @min_cc_id = 0;       //each vertex's tentative component id
+  MapAccum<INT, INT> @@comp_sizes_map;
+  MapAccum<INT, ListAccum<INT>> @@comp_group_by_size_map;
+  FILE f(file_path); 
+
+  Start = {v_type_set};
+
+  # Initialize: Label each vertex with its own internal ID
+  S = SELECT x 
+      FROM Start:x
+      POST-ACCUM x.@min_cc_id = getvid(x);
+
+  # Propagate smaller internal IDs until no more ID changes can be Done
+  WHILE (S.size()>0) DO
+      S = SELECT t
+          FROM S:s -(e_type_set:e)- v_type_set:t
+  	ACCUM t.@min_cc_id += s.@min_cc_id // If s has smaller id than t, copy the id to t
+  	HAVING t.@min_cc_id != t.@min_cc_id';
+  END;
+  IF file_path != "" THEN
+      f.println("Vertex_ID","Component_ID");
+  END;
+
+  Start = {v_type_set};
+  Start = SELECT s 
+          FROM Start:s
+  	POST-ACCUM 
+  	    IF result_attribute != "" THEN 
+  	        s.setAttr(result_attribute, s.@min_cc_id) 
+  	    END,
+  	    IF print_results THEN 
+  	        @@comp_sizes_map += (s.@min_cc_id -> 1) 
+  	    END,
+  	    IF file_path != "" THEN 
+  	        f.println(s, s.@min_cc_id) 
+  	    END;
+
+  IF print_results THEN
+      IF print_limit >= 0 THEN
+          Start = SELECT s 
+                  FROM Start:s 
+                  LIMIT print_limit;
+      END;
+      FOREACH (compId,size) IN @@comp_sizes_map DO
+          @@comp_group_by_size_map += (size -> compId);
+      END;
+      PRINT @@comp_group_by_size_map.size() AS number_of_connected_components;
+  END;
+}
+"""
@@ -0,0 +1,42 @@
+PLAN_TOOL_EXECUTION_PROMPT = """
+## Role
+You are a helpful assistant that uses TigerGraph-MCP tools and flows to fulfill user requests.
+
+## Objective
+Understand the user's request and determine whether any tools need to be executed to fulfill it. If all required tool calls have already been completed, present the results in detail. Otherwise, select and execute the next appropriate tool(s), following the correct order.
+
+## Instructions
+- First, **check if the user's instruction has already been satisfied** by reviewing the existing conversation and tool responses.
+  - If the request is complete, do **not** call any more tools. Instead, return a natural language response summarizing all tool results clearly and thoroughly.
+- If more steps are required, determine which tool(s) to call next.
+
+### Tool Calling Rules:
+- Call tools using `tool_calls`.
+- If the request involves creating a schema (`trigger_graph_schema_creation`) or loading data (`trigger_load_data`), you **must** call each of these tools **individually**, without grouping them with any other tools in the same call.
+- For example, if the user asks to:
+  1. Preview sample data
+  2. Create a schema
+  3. Load data  
+  Then call tools in this order:
+    - Preview sample data
+    - Then call `trigger_graph_schema_creation` **alone**
+    - Then call `trigger_load_data` **alone**
+
+## Output Format
+If you need to call a tool, use tool_calls.
+
+If all tools execution completed, return information to the user, present the results in friendly, readable text in markdown format. Show each tool’s outcome with complete details. Don’t omit any valuable information—for example, if the schema includes attribute names and types, be sure to include both.
+
+Here is an example:
+```
+
+1. The node 'john_doe' was successfully added to the graph.
+
+2. Number of nodes in the graph: 232,805 nodes.
+
+3. Number of edges in the graph: 197,845 edges.
+
+4. The neighbor query for 'john_doe' returned 3 connected nodes: 'jane_doe', 'acme_corp', and 'project_alpha'.
+
+```
+"""