tweet_source.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. import json
  2. import requests
  3. import sqlite3
  4. class ArchiveTweetSource:
  5. """
  6. id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_nam
  7. """
  8. def __init__ (self, archive_path, db_path = "data/tweet.db", archive_user_id = None):
  9. self.archive_path = archive_path
  10. self.user_id = archive_user_id
  11. self.db_path = db_path
  12. return
  13. def get_db (self):
  14. db = sqlite3.connect(self.db_path)
  15. return db
  16. def get_user_timeline (self,
  17. author_id = None, max_results = 10, since_id = None):
  18. if max_results == None:
  19. max_results = -1
  20. sql_params = []
  21. where_sql = []
  22. # if the ID is not stored as a number (eg. string) then this could be a problem
  23. if since_id:
  24. where_sql.append("id > ?")
  25. sql_params.append(since_id)
  26. #if author_id:
  27. # where_sql.append("author_id = ?")
  28. # sql_params.append(author_id)
  29. where_sql = " and ".join(where_sql)
  30. sql_cols = "id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name"
  31. if author_id:
  32. sql_cols += ", '{}' as author_id".format(author_id)
  33. if where_sql:
  34. where_sql = "where {}".format(where_sql)
  35. sql = "select {} from tweet {} order by created_at asc limit ?".format(sql_cols, where_sql)
  36. sql_params.append(max_results)
  37. db = self.get_db()
  38. cur = db.cursor()
  39. cur.row_factory = sqlite3.Row
  40. print(sql)
  41. print(sql_params)
  42. results = list(map(dict, cur.execute(sql, sql_params).fetchall()))
  43. return results
  44. def get_tweet (self, id_):
  45. return self.get_tweets([id_])
  46. def get_tweets (self,
  47. ids):
  48. sql_params = []
  49. where_sql = []
  50. if since_id:
  51. ids_in_list_sql = "id in ({})".format( ','.join(['?'] * len(ids)))
  52. where_sql.append(ids_in_list_sql)
  53. sql_params += ids
  54. where_sql = " and ".join(where_sql)
  55. sql = "select * from tweet where {} limit ?".format(where_sql)
  56. db = self.get_db()
  57. cur = db.cursor()
  58. cur.row_factory = sqlite3.Row
  59. results = list(map(dict, cur.execute(sql, sql_params).fetchall()))
  60. return results
  61. def search_tweets (self,
  62. query,
  63. since_id = None,
  64. max_results = 10,
  65. sort_order = None
  66. ):
  67. return
  68. # https://developer.twitter.com/en/docs/twitter-api/v1/tweets/curate-a-collection/api-reference/get-collections-entries
  69. # we can perhaps steal a token from the TweetDeck Console, otherwise we need to apply for Standard v1.1 / Elevated
  70. class ApiV11TweetCollectionSource:
  71. def __init__ (self, token):
  72. self.token = token
  73. def create_collection (self, name):
  74. return
  75. def bulk_add_to_collection (self, collection_id, items):
  76. return
  77. def add_to_collection (self, collection_id, item):
  78. return
  79. def get_collection_tweets (self, collection_id):
  80. return
  81. class TwitterApiV2SocialGraph:
  82. def __init__ (self, token):
  83. self.token = token
  84. def get_user (user_id, is_username=False):
  85. # GET /2/users/:id
  86. # GET /2/users/by/:username
  87. return
  88. def get_users (user_ids, are_usernames=False):
  89. # GET /2/users/by?usernames=
  90. # GET /2/users?ids=
  91. return
  92. def get_following (user_id,
  93. max_results = 10, pagination_token = None):
  94. # GET /2/users/:id/following
  95. return
  96. def get_followers (user_id,
  97. max_results = 10, pagination_token = None):
  98. # GET /2/users/:id/followers
  99. return
  100. def follow_user (user_id, target_user_id):
  101. # POST /2/users/:id/following
  102. # {target_user_id}
  103. return
  104. def unfollow_user (user_id, target_user_id):
  105. # DELETE /2/users/:source_user_id/following/:target_user_id
  106. return
  107. class ApiV2TweetSource:
  108. def __init__ (self, token):
  109. self.token = token
  110. def create_tweet (self, text,
  111. reply_to_tweet_id = None, quote_tweet_id = None):
  112. url = "https://api.twitter.com/2/tweets"
  113. tweet = {
  114. 'text': text
  115. }
  116. if reply_to_tweet_id:
  117. tweet['reply'] = {
  118. 'in_reply_to_tweet_id': reply_to_tweet_id
  119. }
  120. if quote_tweet_id:
  121. tweet['quote_tweet_id'] = quote_tweet_id
  122. body = json.dumps(tweet)
  123. headers = {
  124. 'Authorization': 'Bearer {}'.format(self.token),
  125. 'Content-Type': 'application/json'
  126. }
  127. response = requests.post(url, data=body, headers=headers)
  128. result = json.loads(response.text)
  129. return result
  130. def retweet_tweet( self, user_id, tweet_id ):
  131. url = "https://api.twitter.com/2/users/{}/retweets".format(user_id)
  132. retweet = {
  133. 'tweet_id': tweet_id
  134. }
  135. body = json.dumps(retweet)
  136. headers = {
  137. 'Authorization': 'Bearer {}'.format(self.token),
  138. 'Content-Type': 'application/json'
  139. }
  140. response = requests.post(url, data=body, headers=headers)
  141. result = json.loads(response.text)
  142. return result
  143. def get_home_timeline (self, user_id, variant = 'reverse_chronological', max_results = 10, pagination_token = None, since_id = None):
  144. """
  145. Get a user's timeline as viewed by the user themselves.
  146. """
  147. path = 'users/{}/timelines/{}'.format(user_id, variant)
  148. return self.get_timeline(path,
  149. max_results=max_results, pagination_token=pagination_token, since_id=since_id)
  150. def get_timeline (self, path,
  151. max_results = 10, pagination_token = None, since_id = None,
  152. non_public_metrics = False,
  153. exclude_replies=False):
  154. """
  155. Get any timeline, including custom curated timelines built by Tweet Deck / ApiV11.
  156. """
  157. token = self.token
  158. url = "https://api.twitter.com/2/{}".format(path)
  159. tweet_fields = ["created_at", "conversation_id", "referenced_tweets", "text", "public_metrics", "entities", "attachments"]
  160. media_fields = ["alt_text", "type", "preview_image_url", "public_metrics", "url", "media_key", "duration_ms", "width", "height", "variants"]
  161. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  162. expansions = ["entities.mentions.username",
  163. "attachments.media_keys",
  164. "author_id",
  165. "referenced_tweets.id",
  166. "referenced_tweets.id.author_id"]
  167. if non_public_metrics:
  168. tweet_fields.append("non_public_metrics")
  169. media_fields.append("non_public_metrics")
  170. params = {
  171. "expansions": ",".join(expansions),
  172. "media.fields": ",".join(media_fields),
  173. "tweet.fields": ",".join(tweet_fields),
  174. "user.fields": ",".join(user_fields),
  175. "max_results": max_results,
  176. }
  177. exclude = []
  178. if exclude_replies:
  179. exclude.append('replies')
  180. if len(exclude):
  181. params['exclude'] = exclude
  182. if pagination_token:
  183. params['pagination_token'] = pagination_token
  184. if since_id:
  185. params['since_id'] = since_id
  186. headers = {"Authorization": "Bearer {}".format(token)}
  187. #headers = {"Authorization": "access_token {}".format(access_token)}
  188. response = requests.get(url, params=params, headers=headers)
  189. response_json = json.loads(response.text)
  190. return response_json
  191. def get_mentions_timeline (self, user_id,
  192. max_results = 10, pagination_token = None, since_id = None):
  193. path = "users/{}/mentions".format(user_id)
  194. return self.get_timeline(path,
  195. max_results=max_results, pagination_token=pagination_token, since_id=since_id)
  196. def get_user_timeline (self, user_id,
  197. max_results = 10, pagination_token = None, since_id = None,
  198. non_public_metrics=False,
  199. exclude_replies=False):
  200. """
  201. Get a user's Tweets as viewed by another.
  202. """
  203. path = "users/{}/tweets".format(user_id)
  204. return self.get_timeline(path,
  205. max_results=max_results, pagination_token=pagination_token, since_id=since_id,
  206. non_public_metrics = non_public_metrics,
  207. exclude_replies=exclude_replies)
  208. def get_tweet (self, id_, non_public_metrics = False):
  209. return self.get_tweets([id_], non_public_metrics = non_public_metrics)
  210. def get_tweets (self,
  211. ids,
  212. non_public_metrics = False):
  213. token = self.token
  214. url = "https://api.twitter.com/2/tweets"
  215. tweet_fields = ["created_at", "conversation_id", "referenced_tweets", "text", "public_metrics", "entities", "attachments"]
  216. media_fields = ["alt_text", "type", "preview_image_url", "public_metrics", "url", "media_key", "duration_ms", "width", "height", "variants"]
  217. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  218. expansions = ["entities.mentions.username",
  219. "attachments.media_keys",
  220. "author_id",
  221. "referenced_tweets.id",
  222. "referenced_tweets.id.author_id"]
  223. if non_public_metrics:
  224. tweet_fields.append("non_public_metrics")
  225. media_fields.append("non_public_metrics")
  226. params = {
  227. "ids": ','.join(ids),
  228. "expansions": ",".join(expansions),
  229. "media.fields": ",".join(media_fields),
  230. "tweet.fields": ",".join(tweet_fields),
  231. "user.fields": ",".join(user_fields)
  232. }
  233. headers = {"Authorization": "Bearer {}".format(token)}
  234. response = requests.get(url, params=params, headers=headers)
  235. response_json = json.loads(response.text)
  236. return response_json
  237. def search_tweets (self,
  238. query,
  239. pagination_token = None,
  240. since_id = None,
  241. max_results = 10,
  242. sort_order = None,
  243. non_public_metrics = False
  244. ):
  245. token = self.token
  246. url = "https://api.twitter.com/2/tweets/search/recent"
  247. tweet_fields = ["created_at", "conversation_id", "referenced_tweets", "text", "public_metrics", "entities", "attachments"]
  248. media_fields = ["alt_text", "type", "preview_image_url", "public_metrics", "url", "media_key", "duration_ms", "width", "height", "variants"]
  249. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  250. expansions = ["entities.mentions.username",
  251. "attachments.media_keys",
  252. "author_id",
  253. "referenced_tweets.id",
  254. "referenced_tweets.id.author_id"]
  255. if non_public_metrics:
  256. tweet_fields.append("non_public_metrics")
  257. media_fields.append("non_public_metrics")
  258. params = {
  259. "expansions": ",".join(expansions),
  260. "media.fields": ",".join(media_fields),
  261. "tweet.fields": ",".join(tweet_fields),
  262. "user.fields": ",".join(user_fields),
  263. "query": query,
  264. "max_results": max_results,
  265. }
  266. if pagination_token:
  267. params['pagination_token'] = pagination_token
  268. if since_id:
  269. params['since_id'] = since_id
  270. if sort_order:
  271. params['sort_order'] = sort_order
  272. headers = {"Authorization": "Bearer {}".format(token)}
  273. response = requests.get(url, params=params, headers=headers)
  274. response_json = json.loads(response.text)
  275. return response_json
  276. def count_tweets (self,
  277. query,
  278. since_id = None,
  279. granularity = 'hour'
  280. ):
  281. token = self.token
  282. url = "https://api.twitter.com/2/tweets/counts/recent"
  283. params = {
  284. "query": query
  285. }
  286. if since_id:
  287. params['since_id'] = since_id
  288. headers = {"Authorization": "Bearer {}".format(token)}
  289. response = requests.get(url, params=params, headers=headers)
  290. print(response.status_code)
  291. print(response.text)
  292. response_json = json.loads(response.text)
  293. return response_json
  294. #def get_conversation (self, tweet_id, pagination_token = None,
  295. # TODO
  296. def get_thread (self, tweet_id,
  297. author_id = None,
  298. pagination_token = None,
  299. since_id = None,
  300. max_results = 10,
  301. sort_order = None
  302. ):
  303. # FIXME author_id can be determined from a Tweet object
  304. query = "conversation_id:{}".format(tweet_id)
  305. if author_id:
  306. query += " from:{}".format(author_id)
  307. return self.search_tweets(query,
  308. pagination_token = pagination_token, since_id = since_id, max_results = max_results, sort_order = sort_order)
  309. def get_bookmarks (self, user_id,
  310. max_results = 10, pagination_token = None, since_id = None):
  311. path = "users/{}/bookmarks".format(user_id)
  312. return self.get_timeline(path,
  313. max_results=max_results, pagination_token=pagination_token, since_id=since_id)
  314. def get_media_tweets (self,
  315. author_id = None,
  316. has_media = True,
  317. has_links = None,
  318. has_images = None,
  319. has_videos = None,
  320. pagination_token = None,
  321. since_id = None,
  322. max_results = 10,
  323. sort_order = None
  324. ):
  325. # FIXME author_id can be determined from a Tweet object
  326. query = ""
  327. if has_media != None:
  328. if not has_media:
  329. query += "-"
  330. query += "has:media "
  331. if has_links != None:
  332. if not has_links:
  333. query += " -"
  334. query += "has:links "
  335. if has_images != None:
  336. if not has_images:
  337. query += " -"
  338. query += "has:images "
  339. if has_videos != None:
  340. if not has_videos:
  341. query += " -"
  342. query += "has:videos "
  343. if author_id:
  344. query += "from:{} ".format(author_id)
  345. return self.search_tweets(query,
  346. pagination_token = pagination_token, since_id = since_id, max_results = max_results, sort_order = sort_order)
  347. def get_retweets (self, tweet_id):
  348. # GET /2/tweets/:id/retweeted_by
  349. return
  350. def get_quote_tweets( self, tweet_id):
  351. # GET /2/tweets/:id/quote_tweets
  352. return
  353. def get_likes (self, tweet_id):
  354. # GET /2/tweets/:id/liking_users
  355. return
  356. def get_liked_by (self, user_id):
  357. # GET /2/users/:id/liked_tweets
  358. return
  359. def get_list_tweets (self, list_id):
  360. # GET /2/lists/:id/tweets
  361. return