tweet_source.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. import json
  2. import requests
  3. import sqlite3
  4. class ArchiveTweetSource:
  5. """
  6. id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_nam
  7. """
  8. def __init__ (self, archive_path, db_path = ".data/tweet.db", archive_user_id = None):
  9. self.archive_path = archive_path
  10. self.user_id = archive_user_id
  11. self.db_path = db_path
  12. return
  13. def get_db (self):
  14. db = sqlite3.connect(self.db_path)
  15. return db
  16. def get_user_timeline (self,
  17. author_id = None, max_results = 10, since_id = None):
  18. if max_results == None:
  19. max_results = -1
  20. sql_params = []
  21. where_sql = []
  22. # if the ID is not stored as a number (eg. string) then this could be a problem
  23. if since_id:
  24. where_sql.append("id > ?")
  25. sql_params.append(since_id)
  26. #if author_id:
  27. # where_sql.append("author_id = ?")
  28. # sql_params.append(author_id)
  29. where_sql = " and ".join(where_sql)
  30. sql_cols = "id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name"
  31. if author_id:
  32. sql_cols += ", '{}' as author_id".format(author_id)
  33. if where_sql:
  34. where_sql = "where {}".format(where_sql)
  35. sql = "select {} from tweet {} order by created_at asc limit ?".format(sql_cols, where_sql)
  36. sql_params.append(max_results)
  37. db = self.get_db()
  38. cur = db.cursor()
  39. cur.row_factory = sqlite3.Row
  40. print(sql)
  41. print(sql_params)
  42. results = list(map(dict, cur.execute(sql, sql_params).fetchall()))
  43. return results
  44. def get_tweet (self, id_):
  45. return self.get_tweets([id_])
  46. def get_tweets (self,
  47. ids):
  48. sql_params = []
  49. where_sql = []
  50. if since_id:
  51. ids_in_list_sql = "id in ({})".format( ','.join(['?'] * len(ids)))
  52. where_sql.append(ids_in_list_sql)
  53. sql_params += ids
  54. where_sql = " and ".join(where_sql)
  55. sql = "select * from tweet where {} limit ?".format(where_sql)
  56. db = self.get_db()
  57. cur = db.cursor()
  58. cur.row_factory = sqlite3.Row
  59. results = list(map(dict, cur.execute(sql, sql_params).fetchall()))
  60. return results
  61. def search_tweets (self,
  62. query,
  63. since_id = None,
  64. max_results = 10,
  65. sort_order = None
  66. ):
  67. return
  68. # https://developer.twitter.com/en/docs/twitter-api/v1/tweets/curate-a-collection/api-reference/get-collections-entries
  69. # we can perhaps steal a token from the TweetDeck Console, otherwise we need to apply for Standard v1.1 / Elevated
  70. class ApiV11TweetCollectionSource:
  71. def __init__ (self, token):
  72. self.token = token
  73. def create_collection (self, name):
  74. return
  75. def bulk_add_to_collection (self, collection_id, items):
  76. return
  77. def add_to_collection (self, collection_id, item):
  78. return
  79. def get_collection_tweets (self, collection_id):
  80. return
  81. class TwitterApiV2SocialGraph:
  82. def __init__ (self, token):
  83. self.token = token
  84. def get_user (user_id, is_username=False):
  85. # GET /2/users/:id
  86. # GET /2/users/by/:username
  87. return
  88. def get_users (user_ids, are_usernames=False):
  89. # GET /2/users/by?usernames=
  90. # GET /2/users?ids=
  91. return
  92. def get_following (self, user_id,
  93. max_results = 1000, pagination_token = None):
  94. # GET /2/users/:id/following
  95. url = "https://api.twitter.com/2/users/{}/following".format(user_id)
  96. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  97. params = {
  98. 'user.fields' : ','.join(user_fields),
  99. 'max_results': max_results
  100. }
  101. if pagination_token:
  102. params['pagination_token'] = pagination_token
  103. headers = {
  104. 'Authorization': 'Bearer {}'.format(self.token),
  105. 'Content-Type': 'application/json'
  106. }
  107. response = requests.get(url, params=params, headers=headers)
  108. result = json.loads(response.text)
  109. return result
  110. return
  111. def get_followers (self, user_id,
  112. max_results = 1000, pagination_token = None):
  113. # GET /2/users/:id/followers
  114. url = "https://api.twitter.com/2/users/{}/followers".format(user_id)
  115. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  116. params = {
  117. 'user.fields' : ','.join(user_fields),
  118. 'max_results': max_results
  119. }
  120. if pagination_token:
  121. params['pagination_token'] = pagination_token
  122. headers = {
  123. 'Authorization': 'Bearer {}'.format(self.token),
  124. 'Content-Type': 'application/json'
  125. }
  126. response = requests.get(url, params=params, headers=headers)
  127. result = json.loads(response.text)
  128. return result
  129. def follow_user (user_id, target_user_id):
  130. # POST /2/users/:id/following
  131. # {target_user_id}
  132. return
  133. def unfollow_user (user_id, target_user_id):
  134. # DELETE /2/users/:source_user_id/following/:target_user_id
  135. return
  136. class ApiV2TweetSource:
  137. def __init__ (self, token):
  138. self.token = token
  139. def create_tweet (self, text,
  140. reply_to_tweet_id = None, quote_tweet_id = None):
  141. url = "https://api.twitter.com/2/tweets"
  142. tweet = {
  143. 'text': text
  144. }
  145. if reply_to_tweet_id:
  146. tweet['reply'] = {
  147. 'in_reply_to_tweet_id': reply_to_tweet_id
  148. }
  149. if quote_tweet_id:
  150. tweet['quote_tweet_id'] = quote_tweet_id
  151. body = json.dumps(tweet)
  152. headers = {
  153. 'Authorization': 'Bearer {}'.format(self.token),
  154. 'Content-Type': 'application/json'
  155. }
  156. response = requests.post(url, data=body, headers=headers)
  157. result = json.loads(response.text)
  158. return result
  159. def retweet (self, tweet_id, user_id):
  160. url = "https://api.twitter.com/2/users/{}/retweets".format(user_id)
  161. retweet = {
  162. 'tweet_id': tweet_id
  163. }
  164. body = json.dumps(retweet)
  165. headers = {
  166. 'Authorization': 'Bearer {}'.format(self.token),
  167. 'Content-Type': 'application/json'
  168. }
  169. response = requests.post(url, data=body, headers=headers)
  170. result = json.loads(response.text)
  171. return result
  172. def bookmark (self, tweet_id, user_id):
  173. url = "https://api.twitter.com/2/users/{}/bookmarks".format(user_id)
  174. bookmark = {
  175. 'tweet_id': tweet_id
  176. }
  177. body = json.dumps(bookmark)
  178. headers = {
  179. 'Authorization': 'Bearer {}'.format(self.token),
  180. 'Content-Type': 'application/json'
  181. }
  182. response = requests.post(url, data=body, headers=headers)
  183. result = json.loads(response.text)
  184. return result
  185. def delete_bookmark (self, tweet_id, user_id):
  186. url = "https://api.twitter.com/2/users/{}/bookmarks/{}".format(user_id, tweet_id)
  187. headers = {
  188. 'Authorization': 'Bearer {}'.format(self.token)
  189. }
  190. response = requests.delete(url, headers=headers)
  191. result = json.loads(response.text)
  192. return result
  193. def get_home_timeline (self, user_id, variant = 'reverse_chronological', max_results = 10, pagination_token = None, since_id = None):
  194. """
  195. Get a user's timeline as viewed by the user themselves.
  196. """
  197. path = 'users/{}/timelines/{}'.format(user_id, variant)
  198. return self.get_timeline(path,
  199. max_results=max_results, pagination_token=pagination_token, since_id=since_id)
  200. def get_timeline (self, path,
  201. max_results = 10, pagination_token = None, since_id = None,
  202. non_public_metrics = False,
  203. exclude_replies=False,
  204. exclude_retweets=False):
  205. """
  206. Get any timeline, including custom curated timelines built by Tweet Deck / ApiV11.
  207. """
  208. token = self.token
  209. url = "https://api.twitter.com/2/{}".format(path)
  210. tweet_fields = ["created_at", "conversation_id", "referenced_tweets", "text", "public_metrics", "entities", "attachments"]
  211. media_fields = ["alt_text", "type", "preview_image_url", "public_metrics", "url", "media_key", "duration_ms", "width", "height", "variants"]
  212. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  213. expansions = ["entities.mentions.username",
  214. "attachments.media_keys",
  215. "author_id",
  216. "referenced_tweets.id",
  217. "referenced_tweets.id.author_id"]
  218. if non_public_metrics:
  219. tweet_fields.append("non_public_metrics")
  220. media_fields.append("non_public_metrics")
  221. params = {
  222. "expansions": ",".join(expansions),
  223. "media.fields": ",".join(media_fields),
  224. "tweet.fields": ",".join(tweet_fields),
  225. "user.fields": ",".join(user_fields),
  226. "max_results": max_results,
  227. }
  228. exclude = []
  229. if exclude_replies:
  230. exclude.append('replies')
  231. if exclude_retweets:
  232. exclude.append('retweets')
  233. if len(exclude):
  234. params['exclude'] = ','.join(exclude)
  235. if pagination_token:
  236. params['pagination_token'] = pagination_token
  237. if since_id:
  238. params['since_id'] = since_id
  239. headers = {"Authorization": "Bearer {}".format(token)}
  240. #headers = {"Authorization": "access_token {}".format(access_token)}
  241. response = requests.get(url, params=params, headers=headers)
  242. response_json = json.loads(response.text)
  243. return response_json
  244. def get_mentions_timeline (self, user_id,
  245. max_results = 10, pagination_token = None, since_id = None):
  246. path = "users/{}/mentions".format(user_id)
  247. return self.get_timeline(path,
  248. max_results=max_results, pagination_token=pagination_token, since_id=since_id)
  249. def get_user_timeline (self, user_id,
  250. max_results = 10, pagination_token = None, since_id = None,
  251. non_public_metrics=False,
  252. exclude_replies=False,
  253. exclude_retweets=False):
  254. """
  255. Get a user's Tweets as viewed by another.
  256. """
  257. path = "users/{}/tweets".format(user_id)
  258. return self.get_timeline(path,
  259. max_results=max_results, pagination_token=pagination_token, since_id=since_id,
  260. non_public_metrics = non_public_metrics,
  261. exclude_replies=exclude_replies, exclude_retweets=exclude_retweets)
  262. def get_tweet (self, id_, non_public_metrics = False):
  263. return self.get_tweets([id_], non_public_metrics = non_public_metrics)
  264. def get_tweets (self,
  265. ids,
  266. non_public_metrics = False):
  267. token = self.token
  268. url = "https://api.twitter.com/2/tweets"
  269. tweet_fields = ["created_at", "conversation_id", "referenced_tweets", "text", "public_metrics", "entities", "attachments"]
  270. media_fields = ["alt_text", "type", "preview_image_url", "public_metrics", "url", "media_key", "duration_ms", "width", "height", "variants"]
  271. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  272. expansions = ["entities.mentions.username",
  273. "attachments.media_keys",
  274. "author_id",
  275. "referenced_tweets.id",
  276. "referenced_tweets.id.author_id"]
  277. if non_public_metrics:
  278. tweet_fields.append("non_public_metrics")
  279. media_fields.append("non_public_metrics")
  280. params = {
  281. "ids": ','.join(ids),
  282. "expansions": ",".join(expansions),
  283. "media.fields": ",".join(media_fields),
  284. "tweet.fields": ",".join(tweet_fields),
  285. "user.fields": ",".join(user_fields)
  286. }
  287. headers = {"Authorization": "Bearer {}".format(token)}
  288. #print(params)
  289. response = requests.get(url, params=params, headers=headers)
  290. response_json = json.loads(response.text)
  291. return response_json
  292. def search_tweets (self,
  293. query,
  294. pagination_token = None,
  295. since_id = None,
  296. max_results = 10,
  297. sort_order = None,
  298. non_public_metrics = False
  299. ):
  300. token = self.token
  301. url = "https://api.twitter.com/2/tweets/search/recent"
  302. tweet_fields = ["created_at", "conversation_id", "referenced_tweets", "text", "public_metrics", "entities", "attachments"]
  303. media_fields = ["alt_text", "type", "preview_image_url", "public_metrics", "url", "media_key", "duration_ms", "width", "height", "variants"]
  304. user_fields = ["created_at", "name", "username", "location", "profile_image_url", "verified"]
  305. expansions = ["entities.mentions.username",
  306. "attachments.media_keys",
  307. "author_id",
  308. "referenced_tweets.id",
  309. "referenced_tweets.id.author_id"]
  310. if non_public_metrics:
  311. tweet_fields.append("non_public_metrics")
  312. media_fields.append("non_public_metrics")
  313. params = {
  314. "expansions": ",".join(expansions),
  315. "media.fields": ",".join(media_fields),
  316. "tweet.fields": ",".join(tweet_fields),
  317. "user.fields": ",".join(user_fields),
  318. "query": query,
  319. "max_results": max_results,
  320. }
  321. if pagination_token:
  322. params['pagination_token'] = pagination_token
  323. if since_id:
  324. params['since_id'] = since_id
  325. if sort_order:
  326. params['sort_order'] = sort_order
  327. headers = {"Authorization": "Bearer {}".format(token)}
  328. response = requests.get(url, params=params, headers=headers)
  329. response_json = json.loads(response.text)
  330. return response_json
  331. def count_tweets (self,
  332. query,
  333. since_id = None,
  334. granularity = 'hour'
  335. ):
  336. token = self.token
  337. url = "https://api.twitter.com/2/tweets/counts/recent"
  338. params = {
  339. "query": query
  340. }
  341. if since_id:
  342. params['since_id'] = since_id
  343. headers = {"Authorization": "Bearer {}".format(token)}
  344. response = requests.get(url, params=params, headers=headers)
  345. print(response.status_code)
  346. print(response.text)
  347. response_json = json.loads(response.text)
  348. return response_json
  349. #def get_conversation (self, tweet_id, pagination_token = None,
  350. # TODO
  351. def get_thread (self, tweet_id,
  352. author_id = None,
  353. only_replies = False,
  354. pagination_token = None,
  355. since_id = None,
  356. max_results = 10,
  357. sort_order = None
  358. ):
  359. # FIXME author_id can be determined from a Tweet object
  360. query = ""
  361. if author_id:
  362. query += " from:{}".format(author_id)
  363. if only_replies:
  364. query += " in_reply_to_tweet_id:{}".format(tweet_id)
  365. else:
  366. query += " conversation_id:{}".format(tweet_id)
  367. print("get_thread query=" + query)
  368. return self.search_tweets(query,
  369. pagination_token = pagination_token, since_id = since_id, max_results = max_results, sort_order = sort_order)
  370. def get_bookmarks (self, user_id,
  371. max_results = 10, pagination_token = None, since_id = None):
  372. path = "users/{}/bookmarks".format(user_id)
  373. return self.get_timeline(path,
  374. max_results=max_results, pagination_token=pagination_token, since_id=since_id)
  375. def get_media_tweets (self,
  376. author_id = None,
  377. has_media = True,
  378. has_links = None,
  379. has_images = None,
  380. has_videos = None,
  381. is_reply = None,
  382. is_retweet = None,
  383. pagination_token = None,
  384. since_id = None,
  385. max_results = 10,
  386. sort_order = None
  387. ):
  388. # FIXME author_id can be determined from a Tweet object
  389. query = ""
  390. if has_media != None:
  391. if not has_media:
  392. query += "-"
  393. query += "has:media "
  394. if has_links != None:
  395. if not has_links:
  396. query += " -"
  397. query += "has:links "
  398. if has_images != None:
  399. if not has_images:
  400. query += " -"
  401. query += "has:images "
  402. if has_videos != None:
  403. if not has_videos:
  404. query += " -"
  405. query += "has:videos "
  406. if is_reply != None:
  407. if not is_reply:
  408. query += " -"
  409. query += "is:reply "
  410. if is_retweet != None:
  411. if not is_retweet:
  412. query += " -"
  413. query += "is:retweet "
  414. if author_id:
  415. query += "from:{} ".format(author_id)
  416. return self.search_tweets(query,
  417. pagination_token = pagination_token, since_id = since_id, max_results = max_results, sort_order = sort_order)
  418. def get_retweets (self, tweet_id):
  419. # GET /2/tweets/:id/retweeted_by
  420. return
  421. def get_quote_tweets( self, tweet_id):
  422. # GET /2/tweets/:id/quote_tweets
  423. return
  424. def get_likes (self, user_id,
  425. max_results = 10, pagination_token = None, since_id = None):
  426. # GET /2/users/:id/liked_tweets
  427. path = "users/{}/liked_tweets".format(user_id)
  428. return self.get_timeline(path,
  429. max_results=max_results, pagination_token=pagination_token, since_id=since_id)
  430. def get_liked_by (self, tweet_id):
  431. # GET /2/tweets/:id/liking_users
  432. return
  433. def get_list_tweets (self, list_id):
  434. # GET /2/lists/:id/tweets
  435. return