from typing import List from dacite import from_dict from configparser import ConfigParser import base64 from flask import Flask, json, Response, render_template, request, send_from_directory, Blueprint, url_for, g from flask_cors import CORS import sqlite3 import os import json import json_stream from zipfile import ZipFile import itertools import datetime import dateutil import dateutil.parser import dateutil.tz import requests from tweet_source import ArchiveTweetSource from view_model import FeedItem, PublicMetrics ARCHIVE_TWEETS_PATH=os.environ.get('ARCHIVE_TWEETS_PATH', '.data/tweets.json') twitter_app = Blueprint('twitter_archive_facade', 'twitter_archive_facade', static_folder='static', static_url_path='', url_prefix='/') @twitter_app.before_request def add_me (): #if me.startswith('twitter') and me in session: #g.twitter_user = {'id': '0'} return @twitter_app.context_processor def inject_me(): #return {'twitter_user': g.twitter_user} return {} # --------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------- # Tweet Archive and old tests # --------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------- # https://stackoverflow.com/questions/48218065/programmingerror-sqlite-objects-created-in-a-thread-can-only-be-used-in-that-sa db = sqlite3.connect(":memory:", check_same_thread=False) db_need_init = True if db_need_init: print("Creating tweet db...") db.execute("create table tweet (id, created_at, content)") def tweets_js_to_json (path, to_path): # open JS file provided in archive and convert it to JSON # string manipulation should be enough return True def populate_tweetsdb_from_compressed_json (db, tweets_json_path): # perf: we should find a batch size for executemany if this is too slow. # https://stackoverflow.com/questions/43785569/for-loop-or-executemany-python-and-sqlite3 ti = open(tweets_json_path) data = json_stream.load(ti) for tweet in data.persistent(): reply = None if "reply" in tweet: reply = tweet["reply"] values = [tweet["id"], tweet["full_text_length"], tweet["date"], reply] db.execute("insert into tweet (id, full_text_length, date, reply) values (?, ?, ?, ?)", values) ti.close() return True def print_retweets (tweets_path): tweets_file = open(tweets_path, 'rt', encoding='utf-8') tweets_data = json_stream.load(tweets_file) print('[') for t in tweets_data: tweet = t.persistent()['tweet'] if int(tweet['retweet_count']) > 1: print(json.dumps({'id': tweet['id'], 'x': tweet['created_at'], 'y': tweet['retweet_count']}) + ',') print(']') tweets_file.close() return True def tweet_to_actpub (t): return t @twitter_app.route('/tweets/isd', methods=['GET']) def get_tweets_isd (): # simulate GraphQL conventions with REST: # created_at[gte]= # created_at[lte]= # author= # content[re]= # expansions=media,... #results = langs_con.execute("select rowid, id, created_at, content from tweet").fetchall() #return Response(json.dumps(results), mimetype='application/json') return send_from_directory('data', 'tweets-ispoogedaily.json') @twitter_app.route('/tweets/storms', methods=['GET']) def get_tweet_storms (): #content = open('data/storm-summaries-2021.json').read() #return Response(content, mimetype='application/json') return send_from_directory('data', 'storm-summaries-2021.json') @twitter_app.route('/bookmarks', methods=['GET']) def get_bookmarks (): #content = open('data/storm-summaries-2021.json').read() #return Response(content, mimetype='application/json') return send_from_directory('data', 'bookmarks-ispoogedaily.json') @twitter_app.route('/timeline', methods=['GET']) def get_timeline (): #content = open('data/storm-summaries-2021.json').read() #return Response(content, mimetype='application/json') return send_from_directory('data', 'timeline-minimal.json') @twitter_app.route('/tweets/compressed', methods=['POST']) def post_tweets_compressed (): db_exists = os.path.exists("tweets.db") if not db_exists: db = sqlite3.connect("tweets.db") db.execute("create table tweet (id, full_text_length, date, reply)") populate_tweetsdb_from_compressed_json(db, ".data/tweet-items.json") db.commit() db.close() #content = open('data/storm-summaries-2021.json').read() #return Response(content, mimetype='application/json') return Response("ok") tweets_form_meta_data = { 'fields': [ {'name': 'id'}, {'name': 'created_at', 'type': 'date'}, {'name': 'retweeted', 'type': 'boolean'}, {'name': 'favorited', 'type': 'boolean'}, {'name': 'retweet_count', 'type': 'int'}, {'name': 'favorite_count', 'type': 'int'}, {'name': 'full_text', 'type': 'string', 'searchable': True}, {'name': 'in_reply_to_status_id_str', 'type': 'string'}, {'name': 'in_reply_to_user_id', 'type': 'string'}, {'name': 'in_reply_to_screen_name', 'type': 'string'} ], 'id': 'id', 'root': 'tweets', 'url': '/tweets/search', 'access': ['read'] } @twitter_app.route('/tweets/form', methods=['GET']) def get_tweets_form (): response_body = { 'metaData': tweets_form_meta_data } return Response(json.dumps(response_body), mimetype="application/json") def db_tweet_to_card (tweet): user = {'username': 'ispoogedaily', 'id': '14520320'} tweet_url = 'https://twitter.com/{}/status/{}'.format(user['username'], tweet['id']) content = tweet['full_text'] + "\n\n[view tweet]({})".format(tweet_url) card = { 'id': 'tweet-' + tweet['id'], 'content': content, 'content_type': 'text/plain', 'created_at': tweet['created_at'], 'modified_at': None, 'title': '@' + user['username'] + ' at ' + tweet['created_at'], 'content_source': tweet_url, #'tweet': tweet, #'user': user } return card # tweetStore = new Ext.data.JsonStore({'url': 'http://localhost:5004/tweets/search.rows.json', 'autoLoad': true}) def tweet_model (tweet_data): # retweeted_by, avi_icon_url, display_name, handle, created_at, text """ {"id": "797839193", "created_at": "2008-04-27T04:00:27", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "Putting pizza on. Come over any time!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}, {"id": "797849979", "created_at": "2008-04-27T04:27:46", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "hijacked!@!!!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"} """ t = { 'id': tweet_data['id'], 'text': tweet_data['full_text'], 'created_at': tweet_data['created_at'], 'author_is_verified': False, 'conversation_id': tweet_data['id'], 'avi_icon_url': '', 'display_name': 'Archive User', 'handle': '!archive', 'author_url': url_for('.get_profile_html', user_id='0'), 'author_id': '0', 'source_url': '!source_url', 'source_author_url': '!source_author_url', #'is_edited': len(tweet_data['edit_history_tweet_ids']) > 1 } t['public_metrics'] = { 'like_count': int(tweet_data['favorite_count']), 'retweet_count': int(tweet_data['retweet_count']), 'reply_count': 0, 'quote_count': 0 } return t def tweet_model_vm (tweet_data) -> List[FeedItem]: # retweeted_by, avi_icon_url, display_name, handle, created_at, text """ {"id": "797839193", "created_at": "2008-04-27T04:00:27", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "Putting pizza on. Come over any time!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}, {"id": "797849979", "created_at": "2008-04-27T04:27:46", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "hijacked!@!!!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"} """ t = FeedItem( id = tweet_data['id'], text = tweet_data['full_text'], created_at = tweet_data['created_at'], author_is_verified = False, conversation_id = tweet_data['id'], avi_icon_url = '', display_name = 'Archive User', handle = '!archive', url = url_for('.get_tweet_html', tweet_id = tweet_data['id']), author_url = url_for('.get_profile_html', user_id='0'), author_id = '0', source_url = '!source_url', source_author_url = '!source_author_url', #'is_edited': len(tweet_data['edit_history_tweet_ids']) > 1 public_metrics = PublicMetrics( like_count = int(tweet_data['favorite_count']), retweet_count = int(tweet_data['retweet_count']), reply_count = 0, quote_count = 0 ) ) return t @twitter_app.route('/profile/.html', methods=['GET']) def get_profile_html (user_id): pagination_token = request.args.get('pagination_token') #exclude_replies = request.args.get('exclude_replies', '1') tweet_source = ArchiveTweetSource(ARCHIVE_TWEETS_PATH) db_tweets = tweet_source.get_user_timeline(author_id = user_id, since_id = pagination_token, #exclude_replies = exclude_replies == '1' ) tweets = list(map(tweet_model_vm, db_tweets)) next_token = db_tweets[-1]['id'] query = {} if next_token: query = { **query, 'next_data_url': url_for('.get_profile_html', user_id=user_id , pagination_token=next_token), 'next_page_url': url_for('.get_profile_html', user_id=user_id , pagination_token=next_token) } profile_user = { 'id': user_id } if 'HX-Request' in request.headers: user = { 'id': user_id } return render_template('partial/tweets-timeline.html', user = profile_user, tweets = tweets, query = query) else: return render_template('user-profile.html', user = profile_user, tweets = tweets, query = query) @twitter_app.get('/tweet/.html') @twitter_app.get('/tweets.html') def get_tweet_html (tweet_id = None): output_format = request.args.get('format') if not tweet_id: ids = request.args.get('ids').split(',') else: ids = [tweet_id] tweet_source = ArchiveTweetSource(ARCHIVE_TWEETS_PATH) db_tweets = tweet_source.get_tweets(ids) tweets = list(map(tweet_model_vm, db_tweets)) query = {} profile_user = {} if output_format == 'feed.json': return jsonify(dict( data = tweets )) else: return render_template('search.html', user = profile_user, tweets = tweets, query = query) @twitter_app.route('/latest.html', methods=['GET']) def get_timeline_home_html (variant = "reverse_chronological", pagination_token=None): return 'ok' @twitter_app.route('/conversations.html', methods=['GET']) def get_conversations_html (): return 'ok' @twitter_app.route('/bookmarks.html', methods=['GET']) def get_bookmarks_html (user_id): return 'ok' @twitter_app.route('/logout.html', methods=['GET']) def get_logout_html (): return 'ok' @twitter_app.route('/media/upload', methods=['POST']) def post_media_upload (): return 'ok' @twitter_app.route('/tweets/search', methods=['GET']) @twitter_app.route('/tweets/search.', methods=['GET']) def get_tweets_search (response_format='json'): search = request.args.get('q') limit = int(request.args.get('limit', 10000)) offset = int(request.args.get('offset', 0)) in_reply_to_user_id = int(request.args.get('in_reply_to_user_id', 0)) db = sqlite3.connect('.data/tweet.db') sql = """ select id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name from tweet """ sql_params = [] if search: sql += " where full_text like ?" sql_params.append("%{}%".format(search)) if in_reply_to_user_id: sql += " where in_reply_to_user_id = ?" sql_params.append(str(in_reply_to_user_id)) sql += ' order by cast(id as integer)' if limit: sql += ' limit ?' sql_params.append(limit) if offset: sql += ' offset ?' sql_params.append(offset) cur = db.cursor() cur.row_factory = sqlite3.Row tweets = list(map(dict, cur.execute(sql, sql_params).fetchall())) cur.close() db.close() result = None if response_format == 'cards.json': cards = list(map(db_tweet_to_card, tweets)) result = { "q": search, "cards": cards } elif response_format == 'rows.json': meta = tweets_form_meta_data fields = meta['fields'] fields = list(map(lambda f: {**f[1], 'mapping': f[0]}, enumerate(fields))) meta = {**meta, 'fields': fields, 'id': '0'} def tweet_to_row (t): row = list(map(lambda f: t.get(f['name']), fields)) return row rows = list(map(tweet_to_row, tweets)) result = { "q": search, "metaData": meta, "tweets": rows } elif response_format == 'html': tweets = list(map(tweet_model_vm, tweets)) query = {} profile_user = {} return render_template('search.html', user = profile_user, tweets = tweets, query = query) else: result = { "q": search, "tweets": tweets } return Response(json.dumps(result), mimetype="application/json") @twitter_app.route('/tweets', methods=['POST']) def post_tweets (): tweets_path = ARCHIVE_TWEETS_PATH tweets_file = open(tweets_path, 'rt', encoding='utf-8') tweets_data = json_stream.load(tweets_file) db = sqlite3.connect('.data/tweet.db') db.execute('create table tweet (id integer, created_at, retweeted, favorited, retweet_count integer, favorite_count integer, full_text, in_reply_to_status_id_str integer, in_reply_to_user_id, in_reply_to_screen_name)') db.commit() i = 0 cur = db.cursor() for tweet in tweets_data.persistent(): t = dict(tweet['tweet']) dt = dateutil.parser.parse(t['created_at']) dt_utc = dt.astimezone(dateutil.tz.tz.gettz('UTC')) created_at = dt_utc.strftime('%Y-%m-%dT%H:%M:%SZ') sql = 'insert into tweet (id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name) values (?,?,?,?,?,?,?,?,?,?)' tweet_values = [ t['id'], created_at, t['retweeted'], t['favorited'], t['retweet_count'], t['favorite_count'], t['full_text'], t.get('in_reply_to_status_id_str'), t.get('in_reply_to_user_id'), t.get('in_reply_to_screen_name') ] cur.execute(sql, tweet_values) i += 1 if i % 100 == 0: cur.connection.commit() cur = db.cursor() cur.connection.commit() cur.close() db.close() tweets_file.close() # --------------------------------------------------------------------------------------------------------- # --------------------------------------------------------------------------------------------------------- def tweet_to_card (tweet, includes): user = list(filter(lambda u: u.get('id') == tweet['author_id'], includes.get('users')))[0] tweet_url = 'https://twitter.com/{}/status/{}'.format(user['username'], tweet['id']) content = tweet['text'] + "\n\n[view tweet]({})".format(tweet_url) card = { 'id': 'tweet-' + tweet['id'], 'content': content, 'content_type': 'text/markdown', 'created_at': tweet['created_at'], # can be derived from oldest in edit_history_tweet_ids 'modified_at': None, # can be derived from newest in edit_history_tweet_ids 'title': '@' + user['username'] + ' at ' + tweet['created_at'], 'content_source': tweet_url, #'tweet': tweet, #'user': user } return card def response_to_cards (response_json, add_included = True): tweets = response_json.get('data') includes = response_json.get('includes') cards = list(map(lambda t: tweet_to_card(t, includes), tweets)) if add_included: included_cards = list(map(lambda t: tweet_to_card(t, includes), includes.get('tweets'))) cards += included_cards return cards