Skip to content

Commit 40c8727

Browse files
chore: Added infra & scripts for workshop
2 parents 37e8233 + efa449d commit 40c8727

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+8956
-748
lines changed

data/default/_generated_script.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
import os
2+
import json
3+
import random
4+
from datetime import datetime, timedelta
5+
import pandas as pd
6+
from fpdf import FPDF
7+
8+
# Define counts
9+
NUM_OUTAGES = 16
10+
NUM_TICKETS = 40
11+
12+
# Output directory
13+
output_dir = "C:/Work/15_fabric_ontology/data/20260203_112459_telecommunications"
14+
config_dir = os.path.join(output_dir, "config")
15+
tables_dir = os.path.join(output_dir, "tables")
16+
documents_dir = os.path.join(output_dir, "documents")
17+
18+
# Create folders
19+
os.makedirs(config_dir, exist_ok=True)
20+
os.makedirs(tables_dir, exist_ok=True)
21+
os.makedirs(documents_dir, exist_ok=True)
22+
23+
# Generate primary table: network outages
24+
outages = pd.DataFrame({
25+
'outage_id': [f'OUT{str(i).zfill(3)}' for i in range(1, NUM_OUTAGES + 1)],
26+
'outage_start': [(datetime(2024, 1, 1) + timedelta(days=random.randint(0, 30))).strftime('%Y-%m-%d %H:%M:%S') for _ in range(NUM_OUTAGES)],
27+
'duration_minutes': [random.randint(30, 480) for _ in range(NUM_OUTAGES)],
28+
'impact_level': random.choices(['Low', 'Medium', 'High'], weights=[50, 30, 20], k=NUM_OUTAGES)
29+
})
30+
31+
outages.to_csv(os.path.join(tables_dir, "network_outages.csv"), index=False)
32+
33+
# Generate secondary table: trouble tickets
34+
tickets = pd.DataFrame({
35+
'ticket_id': [f'TIC{str(i).zfill(3)}' for i in range(1, NUM_TICKETS + 1)],
36+
'ticket_created': [(datetime(2024, 1, 1) + timedelta(days=random.randint(0, 60))).strftime('%Y-%m-%d') for _ in range(NUM_TICKETS)],
37+
'resolution_time': [random.randint(1, 72) for _ in range(NUM_TICKETS)], # In hours
38+
'outage_id': [f'OUT{str(random.randint(1, NUM_OUTAGES)).zfill(3)}' for _ in range(NUM_TICKETS)],
39+
'customer_impact': random.choices(['None', 'Minor', 'Major'], weights=[40, 50, 10], k=NUM_TICKETS)
40+
})
41+
42+
tickets.to_csv(os.path.join(tables_dir, "trouble_tickets.csv"), index=False)
43+
44+
# Create the ontology_config.json
45+
config = {
46+
"scenario": "telecommunications",
47+
"name": "Network Management",
48+
"description": "Tracking outages and managing customer impacts",
49+
"tables": {
50+
"network_outages": {
51+
"columns": ["outage_id", "outage_start", "duration_minutes", "impact_level"],
52+
"types": {"outage_id": "String", "outage_start": "DateTime", "duration_minutes": "BigInt", "impact_level": "String"},
53+
"key": "outage_id",
54+
"source_table": "network_outages"
55+
},
56+
"trouble_tickets": {
57+
"columns": ["ticket_id", "ticket_created", "resolution_time", "outage_id", "customer_impact"],
58+
"types": {"ticket_id": "String", "ticket_created": "Date", "resolution_time": "BigInt", "outage_id": "String", "customer_impact": "String"},
59+
"key": "ticket_id",
60+
"source_table": "trouble_tickets"
61+
}
62+
},
63+
"relationships": [
64+
{"name": "ticket_outage", "from": "trouble_tickets", "to": "network_outages", "fromKey": "outage_id", "toKey": "outage_id"}
65+
]
66+
}
67+
68+
with open(os.path.join(config_dir, "ontology_config.json"), "w") as f:
69+
json.dump(config, f, indent=4)
70+
71+
# Create sample_questions.txt
72+
questions = """=== SQL QUESTIONS (Fabric Data) ===
73+
- How many outages occurred last month?
74+
- What is the average duration of outages?
75+
- Which outage caused the most customer impact?
76+
- How many trouble tickets were created for each outage?
77+
- What is the average resolution time for tickets?
78+
79+
=== DOCUMENT QUESTIONS (AI Search) ===
80+
- What are the policies for notifying customers of outages?
81+
- How is customer impact classified in our documentation?
82+
- What is the response time required for outages?
83+
- What steps must be taken to escalate an outage?
84+
- How often should outage reports be generated?
85+
86+
=== COMBINED INSIGHT QUESTIONS ===
87+
- Which outages exceeded the maximum duration defined in our policy?
88+
- What percentage of tickets were resolved in less time than our SLA?
89+
- How many outages were rated as 'High' impact based on our threshold?
90+
- Which tickets experienced delays longer than our expected resolution times?
91+
- What was the average customer impact during the last 30 days compared to policy standards?
92+
"""
93+
94+
with open(os.path.join(config_dir, "sample_questions.txt"), "w") as f:
95+
f.write(questions)
96+
97+
# Function to create PDFs
98+
def create_pdf(title, sections, filename):
99+
pdf = FPDF()
100+
pdf.add_page()
101+
pdf.set_font("Helvetica", "B", 16)
102+
pdf.cell(0, 10, title, new_x="LMARGIN", new_y="NEXT", align="C")
103+
pdf.ln(10)
104+
for heading, content in sections:
105+
pdf.set_font("Helvetica", "B", 12)
106+
pdf.cell(0, 8, heading, new_x="LMARGIN", new_y="NEXT")
107+
pdf.set_font("Helvetica", "", 11)
108+
content = content.encode('ascii', 'replace').decode('ascii')
109+
pdf.multi_cell(0, 6, content)
110+
pdf.ln(5)
111+
pdf.output(os.path.join(documents_dir, filename))
112+
113+
# Create PDF policy documents
114+
sections1 = [
115+
("1. Outage Notification Policy",
116+
"In the event of a significant network outage, it is essential to notify impacted customers within 30 minutes. "
117+
"Notifications should be sent via SMS and email for maximum reach. Levels of notification depend on impact: "
118+
"'Major' outages will require direct communications, while 'Minor' ones can be handled through website updates."),
119+
("2. Customer Impact Classification",
120+
"Customer impact is classified into three levels: None, Minor, and Major. "
121+
"'Minor' reflects limited service disruptions affecting a small number of users, whereas 'Major' indicates a significant disruption affecting a large customer base. "
122+
"This classification guides response strategies and customer communications."),
123+
("3. Outage Reporting Frequency",
124+
"Outage reports must be generated and reviewed on a weekly basis. Reports should include total outages, average duration, and customer impact ratings. "
125+
"This data is crucial for assessing the overall health of the network and guiding improvement efforts.")
126+
]
127+
128+
create_pdf("Outage Management Policies", sections1, "outage_management_policies.pdf")
129+
130+
sections2 = [
131+
("1. Ticket Escalation Process",
132+
"If a trouble ticket is unresolved after 24 hours, it should be escalated to a supervisor. Supervisors have an additional 24 hours to resolve issues. "
133+
"For tickets that remain unresolved after this period, further escalation to the management team is mandatory."),
134+
("2. Resolution Time Standards",
135+
"All tickets should ideally be resolved within 72 hours. A timely resolution is a critical aspect of customer satisfaction. "
136+
"Any ticket exceeding this timeline must be flagged for management review and action."),
137+
("3. Customer Feedback Mechanism",
138+
"It is imperative to gather customer feedback on ticket resolution. Follow-up surveys should be sent within one week of ticket closure, with a target response rate of 60%. "
139+
"Feedback will be reviewed bi-weekly to identify areas for service improvement.")
140+
]
141+
142+
create_pdf("Trouble Ticket Management Policies", sections2, "ticket_management_policies.pdf")
143+
144+
sections3 = [
145+
("1. Compliance and Service Level Agreements",
146+
"Service Level Agreements (SLAs) define minimum service quality levels. For outages, resolutions must not exceed a maximum duration of four hours for 'Major' impacts. "
147+
"Regular audits should be conducted to ensure compliance with these SLAs."),
148+
("2. Reporting and Documentation",
149+
"All outages and tickets must be documented with specified details including resolution times, customer impacts, and escalations. "
150+
"Documentation is crucial for ensuring accountability and transparency in our operations."),
151+
("3. Response Time Expectations",
152+
"Customer service representatives must respond to outage inquiries within one hour during business hours. Outside of business hours, responses should occur within three hours. "
153+
"This commitment to prompt responses helps maintain customer trust.")
154+
]
155+
156+
create_pdf("Policies for Customer Service and Accountability", sections3, "customer_service_policies.pdf")
157+
158+
print("Data and documents generated successfully.")

data/default/config/agent_ids.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"_comment": "This file is populated when you run 07_create_foundry_agent.py",
3+
"agent_id": null,
4+
"agent_name": null,
5+
"search_index": null
6+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"_comment": "This file is populated when you run 02_create_fabric_items.py",
3+
"lakehouse_id": null,
4+
"lakehouse_name": null,
5+
"ontology_id": null,
6+
"ontology_name": null,
7+
"solution_name": null,
8+
"created_at": null
9+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"scenario": "telecommunications",
3+
"name": "Network Management",
4+
"description": "Tracking outages and managing customer impacts",
5+
"tables": {
6+
"network_outages": {
7+
"columns": [
8+
"outage_id",
9+
"outage_start",
10+
"duration_minutes",
11+
"impact_level"
12+
],
13+
"types": {
14+
"outage_id": "String",
15+
"outage_start": "DateTime",
16+
"duration_minutes": "BigInt",
17+
"impact_level": "String"
18+
},
19+
"key": "outage_id",
20+
"source_table": "network_outages"
21+
},
22+
"trouble_tickets": {
23+
"columns": [
24+
"ticket_id",
25+
"ticket_created",
26+
"resolution_time",
27+
"outage_id",
28+
"customer_impact"
29+
],
30+
"types": {
31+
"ticket_id": "String",
32+
"ticket_created": "Date",
33+
"resolution_time": "BigInt",
34+
"outage_id": "String",
35+
"customer_impact": "String"
36+
},
37+
"key": "ticket_id",
38+
"source_table": "trouble_tickets"
39+
}
40+
},
41+
"relationships": [
42+
{
43+
"name": "ticket_outage",
44+
"from": "trouble_tickets",
45+
"to": "network_outages",
46+
"fromKey": "outage_id",
47+
"toKey": "outage_id"
48+
}
49+
]
50+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
=== SQL QUESTIONS (Fabric Data) ===
2+
- How many outages occurred last month?
3+
- What is the average duration of outages?
4+
- Which outage caused the most customer impact?
5+
- How many trouble tickets were created for each outage?
6+
- What is the average resolution time for tickets?
7+
8+
=== DOCUMENT QUESTIONS (AI Search) ===
9+
- What are the policies for notifying customers of outages?
10+
- How is customer impact classified in our documentation?
11+
- What is the response time required for outages?
12+
- What steps must be taken to escalate an outage?
13+
- How often should outage reports be generated?
14+
15+
=== COMBINED INSIGHT QUESTIONS ===
16+
- Which outages exceeded the maximum duration defined in our policy?
17+
- What percentage of tickets were resolved in less time than our SLA?
18+
- How many outages were rated as 'High' impact based on our threshold?
19+
- Which tickets experienced delays longer than our expected resolution times?
20+
- What was the average customer impact during the last 30 days compared to policy standards?

data/default/config/schema.json

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
{
2+
"name": "Network Management",
3+
"description": "Tracking outages and managing customer impacts",
4+
"tables": {
5+
"network_outages": {
6+
"columns": [
7+
{
8+
"name": "outage_id",
9+
"type": "String"
10+
},
11+
{
12+
"name": "outage_start",
13+
"type": "DateTime"
14+
},
15+
{
16+
"name": "duration_minutes",
17+
"type": "BigInt"
18+
},
19+
{
20+
"name": "impact_level",
21+
"type": "String"
22+
}
23+
],
24+
"key": "outage_id"
25+
},
26+
"trouble_tickets": {
27+
"columns": [
28+
{
29+
"name": "ticket_id",
30+
"type": "String"
31+
},
32+
{
33+
"name": "ticket_created",
34+
"type": "Date"
35+
},
36+
{
37+
"name": "resolution_time",
38+
"type": "BigInt"
39+
},
40+
{
41+
"name": "outage_id",
42+
"type": "String"
43+
},
44+
{
45+
"name": "customer_impact",
46+
"type": "String"
47+
}
48+
],
49+
"key": "ticket_id"
50+
}
51+
},
52+
"relationships": [
53+
{
54+
"name": "ticket_outage",
55+
"from": "trouble_tickets",
56+
"to": "network_outages",
57+
"fromKey": "outage_id",
58+
"toKey": "outage_id"
59+
}
60+
]
61+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
=== DATABASE SCHEMA ===
2+
3+
network_outages(outage_id*:str, outage_start:date, duration_minutes:int, impact_level:str)
4+
trouble_tickets(ticket_id*:str, ticket_created:dat, resolution_time:int, outage_id:str, customer_impact:str)
5+
6+
JOINS:
7+
trouble_tickets.outage_id -> network_outages.outage_id
8+
9+
RULES:
10+
- Use T-SQL syntax
11+
- Key columns marked with *
12+
- Types: str=string, int=integer, num=decimal
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"_comment": "This file is populated when you run 06_upload_to_search.py",
3+
"index_name": null,
4+
"document_count": 0,
5+
"pdf_files": []
6+
}
1.66 KB
Binary file not shown.
1.75 KB
Binary file not shown.

0 commit comments

Comments
 (0)