-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsp_PythonTest.sql
More file actions
146 lines (118 loc) · 4.89 KB
/
sp_PythonTest.sql
File metadata and controls
146 lines (118 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
IF OBJECT_ID('dbo.sp_PythonTest') IS NULL
EXEC ('CREATE PROCEDURE dbo.sp_PythonTest AS RETURN 0;');
GO
ALTER PROCEDURE [dbo].[sp_PythonTest]
AS
/* Making a table to store our subreddit list in */
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'py_SubredditList')
BEGIN
CREATE TABLE py_SubredditList (Subreddit varchar(50), Hits bigint)
END
/* We're pulling data from these subreddits, add your own if you want to */
IF OBJECT_ID('tempdb..#SubredditList') IS NOT NULL DROP TABLE #SubredditList
CREATE TABLE #SubredditList (Subreddit varchar(50), Hits int)
INSERT INTO #SubredditList (Subreddit, Hits)
VALUES
('nosleep',0)
,('LegalAdvice',0)
,('WritingPrompts',0)
,('TalesFromTechSupport',0)
,('TalesFromRetail',0)
,('TalesFromYourServer',0)
,('thephenomenon',0)
INSERT INTO py_SubredditList (Subreddit, Hits)
SELECT t.Subreddit, t.Hits
FROM #SubredditList t
WHERE NOT EXISTS (SELECT * FROM py_SubredditList sl WHERE t.Subreddit = t.Subreddit)
/* Find the least used subreddit */
DECLARE @SubReddit varchar(50)
SET @SubReddit = (SELECT TOP 1 sr.Subreddit
FROM py_SubredditList sr
WHERE sr.Hits = (SELECT MIN(Hits) FROM py_SubredditList))
/* Here's our block of Python, declaring it dynamically */
DECLARE @sql nvarchar(max) =
N'#! usr/bin/env python3
import praw
import pandas as pd
import datetime as dt
reddit = praw.Reddit(client_id=''PersonalUseScript-14Characters'', \
client_secret=''Secret-27Characters'', \
user_agent=''YourAPIName'', \
username=''YourRedditUsername'', \
password=''YourRedditPassword'')
subreddit = reddit.subreddit(''' + @SubReddit + ''')
for submission in subreddit.top(limit=1):
print(submission.title, submission.id)
top_subreddit = subreddit.new(limit=1000)
topics_dict = {
"title":[],
"score":[],
"id":[],
"url":[],
"comms_num": [],
"created": [],
"body":[]
}
for submission in top_subreddit:
topics_dict["title"].append(submission.title)
topics_dict["score"].append(submission.score)
topics_dict["id"].append(submission.id)
topics_dict["url"].append(submission.url)
topics_dict["comms_num"].append(submission.num_comments)
topics_dict["created"].append(submission.created)
topics_dict["body"].append(submission.selftext)
topics_data = pd.DataFrame(topics_dict)
def get_date(created):
return dt.datetime.fromtimestamp(created)
_timestamp = topics_data["created"].apply(get_date)
topics_data = topics_data.assign(timestamp = _timestamp)
#print(topics_data.body)
OutputDataSet = topics_data
'
IF OBJECT_ID('tempdb..#RedditData') IS NOT NULL DROP TABLE #RedditData
BEGIN
CREATE TABLE #RedditData
(
[body] varchar(max)
,[score] varchar(max)
,[id] varchar(max)
,[url] varchar(max)
,[comms_num] varchar(max)
,[title] varchar(max)
,[url2] varchar(max)
,[created] varchar(max)
)
END
INSERT INTO #RedditData
EXEC sp_execute_external_script @language = N'Python', @script = @sql
/* Increment our subreddit list */
UPDATE py_SubredditList SET Hits = Hits + 1 WHERE Subreddit = @SubReddit
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'py_SampleData')
BEGIN
CREATE TABLE py_SampleData
(
[SampleData] varchar(max)
,[Description] varchar(20)
,[DataType] varchar(20)
,[DataLen] int
,[Subreddit] varchar(20)
)
END
INSERT INTO py_SampleData (SampleData, Description, DataType, DataLen, Subreddit)
SELECT TRIM(title) AS SampleData, 'title' AS [Description], 'varchar' AS [DataType], LEN(title) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE title IS NOT NULL
UNION ALL
SELECT TRIM(score) AS SampleData, 'score' AS [Description], 'int' AS [DataType], LEN(score) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE score IS NOT NULL
UNION ALL
SELECT TRIM(id) AS SampleData, 'id' AS [Description], 'varchar' AS [DataType], LEN(id) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE id IS NOT NULL
UNION ALL
SELECT TRIM(url) AS SampleData, 'url' AS [Description], 'varchar' AS [DataType], LEN(url) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE url IS NOT NULL
UNION ALL
SELECT TRIM(comms_num) AS SampleData, 'comms_num' AS [Description], 'int' AS [DataType], LEN(comms_num) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE comms_num IS NOT NULL
UNION ALL
SELECT TRIM(body) AS SampleData, 'body' AS [Description], 'varchar' AS [DataType], LEN(body) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE body IS NOT NULL
UNION ALL
SELECT TRIM(url2) AS SampleData, 'url2' AS [Description], 'varchar' AS [DataType], LEN(url2) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE url2 IS NOT NULL
UNION ALL
SELECT TRIM(created) AS SampleData, 'created' AS [Description], 'datetime' AS [DataType], LEN(created) AS [DataLen], @SubReddit AS SubReddit FROM #RedditData WHERE created IS NOT NULL
SELECT * FROM #RedditData
GO