twitter_archive_facade.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. from configparser import ConfigParser
  2. import base64
  3. from flask import Flask, json, Response, render_template, request, send_from_directory, Blueprint, url_for, g
  4. from flask_cors import CORS
  5. import sqlite3
  6. import os
  7. import json
  8. import json_stream
  9. from zipfile import ZipFile
  10. import itertools
  11. import datetime
  12. import dateutil
  13. import dateutil.parser
  14. import dateutil.tz
  15. import requests
  16. from tweet_source import ArchiveTweetSource
  17. ARCHIVE_TWEETS_PATH=os.environ.get('ARCHIVE_TWEETS_PATH', '.data/tweets.json')
  18. twitter_app = Blueprint('twitter_archive_facade', 'twitter_archive_facade',
  19. static_folder='static',
  20. static_url_path='',
  21. url_prefix='/')
  22. @twitter_app.before_request
  23. def add_me ():
  24. #if me.startswith('twitter') and me in session:
  25. #g.twitter_user = {'id': '0'}
  26. return
  27. @twitter_app.context_processor
  28. def inject_me():
  29. #return {'twitter_user': g.twitter_user}
  30. return {}
  31. # ---------------------------------------------------------------------------------------------------------
  32. # ---------------------------------------------------------------------------------------------------------
  33. # Tweet Archive and old tests
  34. # ---------------------------------------------------------------------------------------------------------
  35. # ---------------------------------------------------------------------------------------------------------
  36. # https://stackoverflow.com/questions/48218065/programmingerror-sqlite-objects-created-in-a-thread-can-only-be-used-in-that-sa
  37. db = sqlite3.connect(":memory:", check_same_thread=False)
  38. db_need_init = True
  39. if db_need_init:
  40. print("Creating tweet db...")
  41. db.execute("create table tweet (id, created_at, content)")
  42. def tweets_js_to_json (path, to_path):
  43. # open JS file provided in archive and convert it to JSON
  44. # string manipulation should be enough
  45. return True
  46. def populate_tweetsdb_from_compressed_json (db, tweets_json_path):
  47. # perf: we should find a batch size for executemany if this is too slow.
  48. # https://stackoverflow.com/questions/43785569/for-loop-or-executemany-python-and-sqlite3
  49. ti = open(tweets_json_path)
  50. data = json_stream.load(ti)
  51. for tweet in data.persistent():
  52. reply = None
  53. if "reply" in tweet:
  54. reply = tweet["reply"]
  55. values = [tweet["id"], tweet["full_text_length"], tweet["date"], reply]
  56. db.execute("insert into tweet (id, full_text_length, date, reply) values (?, ?, ?, ?)", values)
  57. ti.close()
  58. return True
  59. def print_retweets (tweets_path):
  60. tweets_file = open(tweets_path, 'rt', encoding='utf-8')
  61. tweets_data = json_stream.load(tweets_file)
  62. print('[')
  63. for t in tweets_data:
  64. tweet = t.persistent()['tweet']
  65. if int(tweet['retweet_count']) > 1:
  66. print(json.dumps({'id': tweet['id'], 'x': tweet['created_at'], 'y': tweet['retweet_count']}) + ',')
  67. print(']')
  68. tweets_file.close()
  69. return True
  70. def tweet_to_actpub (t):
  71. return t
  72. @twitter_app.route('/tweets/isd', methods=['GET'])
  73. def get_tweets_isd ():
  74. # simulate GraphQL conventions with REST:
  75. # created_at[gte]=
  76. # created_at[lte]=
  77. # author=
  78. # content[re]=
  79. # expansions=media,...
  80. #results = langs_con.execute("select rowid, id, created_at, content from tweet").fetchall()
  81. #return Response(json.dumps(results), mimetype='application/json')
  82. return send_from_directory('data', 'tweets-ispoogedaily.json')
  83. @twitter_app.route('/tweets/storms', methods=['GET'])
  84. def get_tweet_storms ():
  85. #content = open('data/storm-summaries-2021.json').read()
  86. #return Response(content, mimetype='application/json')
  87. return send_from_directory('data', 'storm-summaries-2021.json')
  88. @twitter_app.route('/bookmarks', methods=['GET'])
  89. def get_bookmarks ():
  90. #content = open('data/storm-summaries-2021.json').read()
  91. #return Response(content, mimetype='application/json')
  92. return send_from_directory('data', 'bookmarks-ispoogedaily.json')
  93. @twitter_app.route('/timeline', methods=['GET'])
  94. def get_timeline ():
  95. #content = open('data/storm-summaries-2021.json').read()
  96. #return Response(content, mimetype='application/json')
  97. return send_from_directory('data', 'timeline-minimal.json')
  98. @twitter_app.route('/tweets/compressed', methods=['POST'])
  99. def post_tweets_compressed ():
  100. db_exists = os.path.exists("tweets.db")
  101. if not db_exists:
  102. db = sqlite3.connect("tweets.db")
  103. db.execute("create table tweet (id, full_text_length, date, reply)")
  104. populate_tweetsdb_from_compressed_json(db, ".data/tweet-items.json")
  105. db.commit()
  106. db.close()
  107. #content = open('data/storm-summaries-2021.json').read()
  108. #return Response(content, mimetype='application/json')
  109. return Response("ok")
  110. tweets_form_meta_data = {
  111. 'fields': [
  112. {'name': 'id'},
  113. {'name': 'created_at', 'type': 'date'},
  114. {'name': 'retweeted', 'type': 'boolean'},
  115. {'name': 'favorited', 'type': 'boolean'},
  116. {'name': 'retweet_count', 'type': 'int'},
  117. {'name': 'favorite_count', 'type': 'int'},
  118. {'name': 'full_text', 'type': 'string', 'searchable': True},
  119. {'name': 'in_reply_to_status_id_str', 'type': 'string'},
  120. {'name': 'in_reply_to_user_id', 'type': 'string'},
  121. {'name': 'in_reply_to_screen_name', 'type': 'string'}
  122. ],
  123. 'id': 'id',
  124. 'root': 'tweets',
  125. 'url': '/tweets/search',
  126. 'access': ['read']
  127. }
  128. @twitter_app.route('/tweets/form', methods=['GET'])
  129. def get_tweets_form ():
  130. response_body = {
  131. 'metaData': tweets_form_meta_data
  132. }
  133. return Response(json.dumps(response_body), mimetype="application/json")
  134. def db_tweet_to_card (tweet):
  135. user = {'username': 'ispoogedaily', 'id': '14520320'}
  136. tweet_url = 'https://twitter.com/{}/status/{}'.format(user['username'], tweet['id'])
  137. content = tweet['full_text'] + "\n\n[view tweet]({})".format(tweet_url)
  138. card = {
  139. 'id': 'tweet-' + tweet['id'],
  140. 'content': content,
  141. 'content_type': 'text/plain',
  142. 'created_at': tweet['created_at'],
  143. 'modified_at': None,
  144. 'title': '@' + user['username'] + ' at ' + tweet['created_at'],
  145. 'content_source': tweet_url,
  146. #'tweet': tweet,
  147. #'user': user
  148. }
  149. return card
  150. # tweetStore = new Ext.data.JsonStore({'url': 'http://localhost:5004/tweets/search.rows.json', 'autoLoad': true})
  151. def tweet_model (tweet_data):
  152. # retweeted_by, avi_icon_url, display_name, handle, created_at, text
  153. """
  154. {"id": "797839193", "created_at": "2008-04-27T04:00:27", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "Putting pizza on. Come over any time!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}, {"id": "797849979", "created_at": "2008-04-27T04:27:46", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "hijacked!@!!!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}
  155. """
  156. t = {
  157. 'id': tweet_data['id'],
  158. 'text': tweet_data['full_text'],
  159. 'created_at': tweet_data['created_at'],
  160. 'author_is_verified': False,
  161. 'conversation_id': tweet_data['id'],
  162. 'avi_icon_url': '',
  163. 'display_name': 'Archive User',
  164. 'handle': '!archive',
  165. 'author_url': url_for('.get_profile_html', user_id='0'),
  166. 'author_id': '0',
  167. 'source_url': '!source_url',
  168. 'source_author_url': '!source_author_url',
  169. #'is_edited': len(tweet_data['edit_history_tweet_ids']) > 1
  170. }
  171. t['public_metrics'] = {
  172. 'like_count': tweet_data['favorite_count'],
  173. 'retweet_count': tweet_data['retweet_count'],
  174. 'reply_count': 0,
  175. 'quote_count': 0
  176. }
  177. return t
  178. @twitter_app.route('/data/timeline/user/<user_id>')
  179. def get_data_timeline_user (user_id):
  180. pagination_token = request.args.get('pagination_token') # since_id
  181. #exclude_replies = request.args.get('exclude_replies')
  182. #is_me = user_id == twitter['id']
  183. tweet_source = ArchiveTweetSource(ARCHIVE_TWEETS_PATH)
  184. db_tweets = tweet_source.get_user_timeline(author_id = user_id,
  185. since_id = pagination_token,
  186. #exclude_replies = exclude_replies == '1'
  187. )
  188. tweets = list(map(tweet_model, db_tweets))
  189. next_token = db_tweets[-1]['id']
  190. query = {}
  191. if next_token:
  192. query = {
  193. **query,
  194. 'next_data_url': url_for('.get_data_timeline_user', user_id=user_id, pagination_token=next_token)
  195. }
  196. if 'HX-Request' in request.headers:
  197. user = {
  198. 'id': user_id
  199. }
  200. return render_template('partial/tweets-timeline.html', user = user, tweets = tweets, query = query)
  201. else:
  202. response_body = json.dumps({
  203. 'tweets': tweets,
  204. 'query': query
  205. })
  206. return Response(response_body, mimetype='application/json')
  207. @twitter_app.route('/profile/<user_id>.html', methods=['GET'])
  208. def get_profile_html (user_id):
  209. pagination_token = request.args.get('pagination_token')
  210. #exclude_replies = request.args.get('exclude_replies', '1')
  211. tweet_source = ArchiveTweetSource(ARCHIVE_TWEETS_PATH)
  212. db_tweets = tweet_source.get_user_timeline(author_id = user_id,
  213. since_id = pagination_token,
  214. #exclude_replies = exclude_replies == '1'
  215. )
  216. tweets = list(map(tweet_model, db_tweets))
  217. next_token = db_tweets[-1]['id']
  218. query = {}
  219. if next_token:
  220. query = {
  221. **query,
  222. 'next_data_url': url_for('.get_data_timeline_user', user_id=user_id, pagination_token=next_token, exclude_replies=1),
  223. 'next_page_url': url_for('.get_profile_html', user_id=user_id , pagination_token=next_token)
  224. }
  225. profile_user = {
  226. 'id': user_id
  227. }
  228. theme = {
  229. 'name': None,
  230. 'body': {'background': 'floralwhite'},
  231. 'timeline': {'tweet': {'background': 'white',
  232. 'border': '1px solid silver'}}
  233. }
  234. return render_template('user-profile.html', user = profile_user, tweets = tweets, query = query, theme=theme)
  235. @twitter_app.get('/tweet.html')
  236. def get_tweet_html (tweet_id):
  237. return ''
  238. @twitter_app.route('/latest.html', methods=['GET'])
  239. def get_timeline_home_html (variant = "reverse_chronological", pagination_token=None):
  240. return 'ok'
  241. @twitter_app.route('/conversations.html', methods=['GET'])
  242. def get_conversations_html ():
  243. return 'ok'
  244. @twitter_app.route('/bookmarks.html', methods=['GET'])
  245. def get_bookmarks_html (user_id):
  246. return 'ok'
  247. @twitter_app.route('/logout.html', methods=['GET'])
  248. def get_logout_html ():
  249. return 'ok'
  250. @twitter_app.route('/media/upload', methods=['POST'])
  251. def post_media_upload ():
  252. return 'ok'
  253. @twitter_app.route('/tweets/search', methods=['GET'])
  254. @twitter_app.route('/tweets/search.<string:response_format>', methods=['GET'])
  255. def get_tweets_search (response_format='json'):
  256. search = request.args.get('q')
  257. limit = int(request.args.get('limit', 10))
  258. offset = int(request.args.get('offset', 0))
  259. in_reply_to_user_id = int(request.args.get('in_reply_to_user_id', 0))
  260. db = sqlite3.connect('.data/tweet.db')
  261. sql = """
  262. select
  263. id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name
  264. from tweet
  265. """
  266. sql_params = []
  267. if search:
  268. sql += " where full_text like ?"
  269. sql_params.append("%{}%".format(search))
  270. if in_reply_to_user_id:
  271. sql += " where in_reply_to_user_id = ?"
  272. sql_params.append(str(in_reply_to_user_id))
  273. if limit:
  274. sql += ' limit ?'
  275. sql_params.append(limit)
  276. if offset:
  277. sql += ' offset ?'
  278. sql_params.append(offset)
  279. cur = db.cursor()
  280. cur.row_factory = sqlite3.Row
  281. tweets = list(map(dict, cur.execute(sql, sql_params).fetchall()))
  282. cur.close()
  283. db.close()
  284. result = None
  285. if response_format == 'cards.json':
  286. cards = list(map(db_tweet_to_card, tweets))
  287. result = {
  288. "q": search,
  289. "cards": cards
  290. }
  291. elif response_format == 'rows.json':
  292. meta = tweets_form_meta_data
  293. fields = meta['fields']
  294. fields = list(map(lambda f: {**f[1], 'mapping': f[0]}, enumerate(fields)))
  295. meta = {**meta, 'fields': fields, 'id': '0'}
  296. def tweet_to_row (t):
  297. row = list(map(lambda f: t.get(f['name']), fields))
  298. return row
  299. rows = list(map(tweet_to_row, tweets))
  300. result = {
  301. "q": search,
  302. "metaData": meta,
  303. "tweets": rows
  304. }
  305. else:
  306. result = {
  307. "q": search,
  308. "tweets": tweets
  309. }
  310. return Response(json.dumps(result), mimetype="application/json")
  311. @twitter_app.route('/tweets', methods=['POST'])
  312. def post_tweets ():
  313. tweets_path = ARCHIVE_TWEETS_PATH
  314. tweets_file = open(tweets_path, 'rt', encoding='utf-8')
  315. tweets_data = json_stream.load(tweets_file)
  316. db = sqlite3.connect('.data/tweet.db')
  317. db.execute('create table tweet (id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name)')
  318. db.commit()
  319. i = 0
  320. cur = db.cursor()
  321. for tweet in tweets_data.persistent():
  322. t = dict(tweet['tweet'])
  323. dt = dateutil.parser.parse(t['created_at'])
  324. dt_utc = dt.astimezone(dateutil.tz.tz.gettz('UTC'))
  325. created_at = dt_utc.strftime('%Y-%m-%dT%H:%M:%SZ')
  326. sql = 'insert into tweet (id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name) values (?,?,?,?,?,?,?,?,?,?)'
  327. tweet_values = [
  328. t['id'],
  329. created_at,
  330. t['retweeted'],
  331. t['favorited'],
  332. t['retweet_count'],
  333. t['favorite_count'],
  334. t['full_text'],
  335. t.get('in_reply_to_status_id_str'),
  336. t.get('in_reply_to_user_id'),
  337. t.get('in_reply_to_screen_name')
  338. ]
  339. cur.execute(sql, tweet_values)
  340. i += 1
  341. if i % 100 == 0:
  342. cur.connection.commit()
  343. cur = db.cursor()
  344. cur.connection.commit()
  345. cur.close()
  346. db.close()
  347. tweets_file.close()
  348. # ---------------------------------------------------------------------------------------------------------
  349. # ---------------------------------------------------------------------------------------------------------
  350. def tweet_to_card (tweet, includes):
  351. user = list(filter(lambda u: u.get('id') == tweet['author_id'], includes.get('users')))[0]
  352. tweet_url = 'https://twitter.com/{}/status/{}'.format(user['username'], tweet['id'])
  353. content = tweet['text'] + "\n\n[view tweet]({})".format(tweet_url)
  354. card = {
  355. 'id': 'tweet-' + tweet['id'],
  356. 'content': content,
  357. 'content_type': 'text/markdown',
  358. 'created_at': tweet['created_at'], # can be derived from oldest in edit_history_tweet_ids
  359. 'modified_at': None, # can be derived from newest in edit_history_tweet_ids
  360. 'title': '@' + user['username'] + ' at ' + tweet['created_at'],
  361. 'content_source': tweet_url,
  362. #'tweet': tweet,
  363. #'user': user
  364. }
  365. return card
  366. def response_to_cards (response_json, add_included = True):
  367. tweets = response_json.get('data')
  368. includes = response_json.get('includes')
  369. cards = list(map(lambda t: tweet_to_card(t, includes), tweets))
  370. if add_included:
  371. included_cards = list(map(lambda t: tweet_to_card(t, includes), includes.get('tweets')))
  372. cards += included_cards
  373. return cards