CityApper
/
hogumathi-app


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620
							from dataclasses import asdict
from typing import List
from dacite import from_dict

from configparser import ConfigParser
import base64
from flask import Flask, json, Response, render_template, request, send_from_directory, Blueprint, url_for, g
from flask_cors import CORS
import sqlite3
import os
import json
import json_stream

from zipfile import ZipFile
import itertools

import datetime
import dateutil
import dateutil.parser
import dateutil.tz

import requests


from twitter_v2 import types as tv2_types
from twitter_v2.archive import ArchiveTweetSource

from hogumathi_app import content_system, view_model as h_vm

ARCHIVE_TWEETS_PATH=os.environ.get('ARCHIVE_TWEETS_PATH', '.data/tweets.json')
TWEET_DB_PATH=os.environ.get('TWEET_DB_PATH', '.data/tweet.db')

twitter_app = Blueprint('twitter_archive_facade', 'twitter_archive_facade',
    static_folder='static',
    static_url_path='',
    url_prefix='/')


@twitter_app.before_request
def add_me ():
    #if me.startswith('twitter') and me in session:
    #g.twitter_user = {'id': '0'}
    return 


@twitter_app.context_processor
def inject_me():
    
    #return {'twitter_user': g.twitter_user}
    return {}
    
    
# ---------------------------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------------------------
#     Tweet Archive and old tests
# ---------------------------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------------------------


# https://stackoverflow.com/questions/48218065/programmingerror-sqlite-objects-created-in-a-thread-can-only-be-used-in-that-sa
db = sqlite3.connect(":memory:", check_same_thread=False)
db_need_init = True

if db_need_init:
  print("Creating tweet db...")
  db.execute("create table tweet (id, created_at, content)")
  
  
def tweets_js_to_json (path, to_path):
  # open JS file provided in archive and convert it to JSON
  # string manipulation should be enough
  return True
  
  
def populate_tweetsdb_from_compressed_json (db, tweets_json_path):

    # perf: we should find a batch size for executemany if this is too slow.
    # https://stackoverflow.com/questions/43785569/for-loop-or-executemany-python-and-sqlite3

    ti = open(tweets_json_path)
    data = json_stream.load(ti)
    for tweet in data.persistent():
        reply = None
        if "reply" in tweet:
            reply = tweet["reply"]
        values = [tweet["id"], tweet["full_text_length"], tweet["date"], reply]
        db.execute("insert into tweet (id, full_text_length, date, reply) values (?, ?, ?, ?)", values)
    ti.close()

    return True

def print_retweets (tweets_path):
    tweets_file = open(tweets_path, 'rt', encoding='utf-8')
    tweets_data = json_stream.load(tweets_file)
    print('[')
    for t in tweets_data:
       tweet = t.persistent()['tweet']
       if int(tweet['retweet_count']) > 1:
           print(json.dumps({'id': tweet['id'], 'x': tweet['created_at'], 'y': tweet['retweet_count']}) + ',')
    print(']')
    tweets_file.close()
    
    return True

def tweet_to_actpub (t):

  return t

@twitter_app.route('/tweets/isd', methods=['GET'])
def get_tweets_isd ():
  # simulate GraphQL conventions with REST:
  # created_at[gte]=
  # created_at[lte]=
  # author=
  # content[re]=
  # expansions=media,...
  #results = langs_con.execute("select rowid, id, created_at, content from tweet").fetchall()
  #return Response(json.dumps(results), mimetype='application/json')
  return send_from_directory('data', 'tweets-ispoogedaily.json')

@twitter_app.route('/tweets/storms', methods=['GET'])
def get_tweet_storms ():
  #content = open('data/storm-summaries-2021.json').read()
  #return Response(content, mimetype='application/json')
  return send_from_directory('data', 'storm-summaries-2021.json')


@twitter_app.route('/bookmarks', methods=['GET'])
def get_bookmarks ():
  #content = open('data/storm-summaries-2021.json').read()
  #return Response(content, mimetype='application/json')
  return send_from_directory('data', 'bookmarks-ispoogedaily.json')


@twitter_app.route('/timeline', methods=['GET'])
def get_timeline ():
  #content = open('data/storm-summaries-2021.json').read()
  #return Response(content, mimetype='application/json')
  return send_from_directory('data', 'timeline-minimal.json')


@twitter_app.route('/tweets/compressed', methods=['POST'])
def post_tweets_compressed ():
  db_exists = os.path.exists(TWEET_DB_PATH)
  
  
  if not db_exists:
    db = sqlite3.connect(TWEET_DB_PATH)
    db.execute("create table tweet (id, full_text_length, date, reply)")
    populate_tweetsdb_from_compressed_json(db, ".data/tweet-items.json")
    db.commit()
    db.close()
  
  #content = open('data/storm-summaries-2021.json').read()
  #return Response(content, mimetype='application/json')
  return Response("ok")


tweets_form_meta_data = {
    'fields': [
        {'name': 'id'},
        {'name': 'created_at', 'type': 'date'},
        {'name': 'retweeted', 'type': 'boolean'},
        {'name': 'favorited', 'type': 'boolean'},
        {'name': 'retweet_count', 'type': 'int'},
        {'name': 'favorite_count', 'type': 'int'},
        {'name': 'full_text', 'type': 'string', 'searchable': True},
        {'name': 'in_reply_to_status_id_str', 'type': 'string'},
        {'name': 'in_reply_to_user_id', 'type': 'string'},
        {'name': 'in_reply_to_screen_name', 'type': 'string'}
        ],
    'id': 'id',
    'root': 'tweets',
    'url': '/tweets/search',
    'access': ['read']
}

@twitter_app.route('/tweets/form', methods=['GET'])
def get_tweets_form ():
    
    response_body = {
        'metaData': tweets_form_meta_data
        }

    return Response(json.dumps(response_body), mimetype="application/json")
    
   
def db_tweet_to_card (tweet):

    user = {'username': 'ispoogedaily', 'id': '14520320'}

    tweet_url = 'https://twitter.com/{}/status/{}'.format(user['username'], tweet['id'])
    
    content = tweet['full_text'] + "\n\n[view tweet]({})".format(tweet_url)
    
    card = {
      'id': 'tweet-' + tweet['id'],
      'content': content,
      'content_type': 'text/plain',
      'created_at': tweet['created_at'],
      'modified_at': None,
      'title': '@' + user['username'] + ' at ' + tweet['created_at'],
      'content_source': tweet_url,
      #'tweet': tweet,
      #'user': user
    }
    
    return card
    
   
# tweetStore = new Ext.data.JsonStore({'url': 'http://localhost:5004/tweets/search.rows.json', 'autoLoad': true})


def tweet_model (tweet_data):
    # retweeted_by, avi_icon_url, display_name, handle, created_at, text
    
    """
    {"id": "797839193", "created_at": "2008-04-27T04:00:27", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "Putting pizza on. Come over any time!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}, {"id": "797849979", "created_at": "2008-04-27T04:27:46", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "hijacked!@!!!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}
    """
    t = {
        'id': tweet_data['id'],
        'text': tweet_data['full_text'],
        'created_at': tweet_data['created_at'],
        'author_is_verified': False,
        
        'conversation_id': tweet_data['id'],
        
        'avi_icon_url': '',
        
        'display_name': 'Archive User',
        'handle': '!archive',
        
        'author_url': url_for('.get_profile_html', user_id='0'),
        'author_id': '0',
        
        'source_url': '!source_url',
        'source_author_url': '!source_author_url',
        #'is_edited': len(tweet_data['edit_history_tweet_ids']) > 1
        
        
    }

    
    t['public_metrics'] = {
        'like_count': int(tweet_data['favorite_count']),
        'retweet_count': int(tweet_data['retweet_count']),
        'reply_count': 0,
        'quote_count': 0
    }
    
    return t


def tweet_model_vm (tweet_data) -> List[h_vm.FeedItem]:
    # retweeted_by, avi_icon_url, display_name, handle, created_at, text
    
    """
    {"id": "797839193", "created_at": "2008-04-27T04:00:27", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "Putting pizza on. Come over any time!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}, {"id": "797849979", "created_at": "2008-04-27T04:27:46", "retweeted": 0, "favorited": 0, "retweet_count": "0", "favorite_count": "0", "full_text": "hijacked!@!!!", "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_screen_name": null, "author_id": "14520320"}
    """
    t = h_vm.FeedItem(
        id = tweet_data['id'],
        text = tweet_data['full_text'],
        created_at = tweet_data['created_at'],
        author_is_verified = False,
        
        conversation_id = tweet_data['id'],
        
        avi_icon_url = '',
        
        display_name = 'Archive User',
        handle = '!archive',
        
        url = url_for('twitter_archive_facade.get_tweet_html', tweet_id = tweet_data['id']),
        
        author_url = url_for('twitter_archive_facade.get_profile_html', user_id='0'),
        author_id = '0',
        
        source_url = '!source_url',
        source_author_url = '!source_author_url',
        #'is_edited': len(tweet_data['edit_history_tweet_ids']) > 1
        
        public_metrics = h_vm.PublicMetrics(
            like_count = int(tweet_data['favorite_count']),
            retweet_count = int(tweet_data['retweet_count']),
            reply_count = 0,
            quote_count = 0
        )
    )
    
    return t
    
    
def get_user_feed (user_id, pagination_token=None, me=None):
    tweet_source = ArchiveTweetSource(ARCHIVE_TWEETS_PATH)
    
    db_tweets = tweet_source.get_user_timeline(author_id = user_id,
                                                    since_id = pagination_token,
                                                    #exclude_replies = exclude_replies == '1'
                                                    )
                                                    

    tweets = list(map(tweet_model_vm, db_tweets))
    next_token = db_tweets[-1]['id']
    
    collection_page = h_vm.CollectionPage(
        id = user_id,
        items = tweets,
        next_token = next_token
    )
    
    return collection_page

def get_tweets (ids, me = None):
    tweet_source = ArchiveTweetSource(ARCHIVE_TWEETS_PATH)
    
    db_tweets = tweet_source.get_tweets(ids)
                                                    
    tweets = list(map(tweet_model_vm, db_tweets))
    
    collection_page = h_vm.CollectionPage(
        id = ','.join(ids),
        items = tweets
    )
    
    return collection_page

def get_tweet (tweet_id, me = None):
    ids = [tweet_id]
    
    collection_page = get_tweets(ids, me=me)
    
    if collection_page.items:
        return collection_page.items[0]


def register_content_sources ():
    content_system.register_content_source('twitter:feed:user:', get_user_feed, id_pattern='([\d]+)')
    
    content_system.register_content_source('twitter:tweets', get_tweets, id_pattern='')
    content_system.register_content_source('twitter:tweet:', get_tweet, id_pattern='([\d]+)')

@twitter_app.route('/profile/<user_id>.html', methods=['GET'])
def get_profile_html (user_id):
    
    pagination_token = request.args.get('pagination_token')
    #exclude_replies = request.args.get('exclude_replies', '1')
    
    # FIXME we want to use a specific source here...
    collection_page = content_system.get_content(f'twitter:feed:user:{user_id}', content_source_id = 'twitter_archive_facade.facade:get_user_feed', pagination_token=pagination_token)
    
    tweets = collection_page.items
    next_token = collection_page.next_token
    
    query = {}
    
    if next_token:
        query = {
            **query,
            'next_data_url': url_for('twitter_archive_facade.get_profile_html', user_id=user_id , pagination_token=next_token),
            'next_page_url': url_for('twitter_archive_facade.get_profile_html', user_id=user_id , pagination_token=next_token)
        }
    
    profile_user = {
            'id': user_id
        }

    if 'HX-Request' in request.headers:
        user = {
            'id': user_id
        }
        
        return render_template('partial/tweets-timeline.html', user = profile_user, tweets = tweets, query = query)
    else:
        return render_template('user-profile.html', user = profile_user, tweets = tweets, query = query)

@twitter_app.get('/tweet/<tweet_id>.html')
@twitter_app.get('/tweets.html')
def get_tweet_html (tweet_id = None):
    
    output_format = request.args.get('format')
    
    if not tweet_id:
        ids = request.args.get('ids').split(',')
        
        collection_page = content_system.get_content('twitter:tweets', ids=ids, content_source_id='twitter_archive_facade.facade:get_tweets')
        
        tweets = collection_page.items
    else:
        tweet = content_system.get_content(f'twitter:tweet:{tweet_id}', content_source_id='twitter_archive_facade.facade:get_tweet')
        
        tweets = [tweet]
    
    query = {}
    profile_user = {}
    
    
    if output_format == 'feed.json':
        return jsonify(dict(
            data = tweets
            ))
    else:
        return render_template('search.html', user = profile_user, tweets = tweets, query = query)

@twitter_app.route('/latest.html', methods=['GET'])
def get_timeline_home_html (variant = "reverse_chronological", pagination_token=None):
    return 'ok'
    
    
@twitter_app.route('/conversations.html', methods=['GET'])
def get_conversations_html ():
    return 'ok'

@twitter_app.route('/bookmarks.html', methods=['GET'])
def get_bookmarks_html (user_id):
    return 'ok'

@twitter_app.route('/logout.html', methods=['GET'])
def get_logout_html ():
    return 'ok'
    
@twitter_app.route('/media/upload', methods=['POST'])
def post_media_upload ():
    return 'ok'


@twitter_app.route('/tweets/search', methods=['GET'])
@twitter_app.route('/tweets/search.<string:response_format>', methods=['GET'])
def get_tweets_search (response_format='json'):
    
    search = request.args.get('q')
    limit = int(request.args.get('limit', 10000))
    offset = int(request.args.get('offset', 0))
    
    in_reply_to_user_id = int(request.args.get('in_reply_to_user_id', 0))
    
    db = sqlite3.connect(TWEET_DB_PATH)
    
    sql = """
select
id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name
from tweet
    """
    
    sql_params = []
    
    if search:
        sql += " where full_text like ?"
        sql_params.append("%{}%".format(search))
        
    if in_reply_to_user_id:
        sql += " where in_reply_to_user_id = ?"
        sql_params.append(str(in_reply_to_user_id))
        
    sql += ' order by cast(id as integer)'
        
    if limit:
        sql += ' limit ?'
        sql_params.append(limit)
        
    if offset:
        sql += ' offset ?'
        sql_params.append(offset)
    
    cur = db.cursor()
    cur.row_factory = sqlite3.Row
    
    tweets = list(map(dict, cur.execute(sql, sql_params).fetchall()))
    cur.close()
    db.close()
    
    result = None
    
    if response_format  == 'cards.json':
        cards = list(map(db_tweet_to_card, tweets))
        
        result = {
            "q": search,
            "cards": cards
        }
    elif response_format == 'rows.json':
        meta = tweets_form_meta_data
        
        fields = meta['fields']
        fields = list(map(lambda f: {**f[1], 'mapping': f[0]}, enumerate(fields)))
        
        meta = {**meta, 'fields': fields, 'id': '0'}
        
        
        def tweet_to_row (t):
            row = list(map(lambda f: t.get(f['name']), fields))
            
            return row
        rows = list(map(tweet_to_row, tweets))
        result = {
            "q": search,
            "metaData": meta,
            "tweets": rows
        }
    elif response_format == 'html':
        tweets = list(map(tweet_model_vm, tweets))
        query = {}
        profile_user = {}
        return render_template('search.html', user = profile_user, tweets = tweets, query = query)
    else:
        result = {
            "q": search,
            "tweets": tweets
        }
    
    return Response(json.dumps(result), mimetype="application/json")

@twitter_app.route('/tweets', methods=['POST'])
def post_tweets ():
    tweets_path = ARCHIVE_TWEETS_PATH
    
    tweets_file = open(tweets_path, 'rt', encoding='utf-8')
    tweets_data = json_stream.load(tweets_file)
    
    db = sqlite3.connect(TWEET_DB_PATH)
    
    db.execute('create table tweet (id integer, created_at, retweeted, favorited, retweet_count integer, favorite_count integer, full_text, in_reply_to_status_id_str integer, in_reply_to_user_id, in_reply_to_screen_name)')
    db.commit()
    
    i = 0
    cur = db.cursor()
    for tweet in tweets_data.persistent():
        t = dict(tweet['tweet'])
        
        dt = dateutil.parser.parse(t['created_at'])
        dt_utc = dt.astimezone(dateutil.tz.tz.gettz('UTC'))
        created_at = dt_utc.strftime('%Y-%m-%dT%H:%M:%SZ')
        
        sql = 'insert into tweet (id, created_at, retweeted, favorited, retweet_count, favorite_count, full_text, in_reply_to_status_id_str, in_reply_to_user_id, in_reply_to_screen_name) values (?,?,?,?,?,?,?,?,?,?)'
        
        tweet_values = [
            t['id'],
            created_at,
            t['retweeted'],
            t['favorited'],
            t['retweet_count'],
            t['favorite_count'],
            t['full_text'],
            
            t.get('in_reply_to_status_id_str'),
            t.get('in_reply_to_user_id'),
            t.get('in_reply_to_screen_name')
        ]
        
        cur.execute(sql, tweet_values)
        
        i += 1
        if i % 100 == 0:
            cur.connection.commit()
            cur = db.cursor()
    
    cur.connection.commit()
    cur.close()
    db.close()
    
    tweets_file.close()

# ---------------------------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------------------------


def tweet_to_card (tweet, includes):
    
    if type(tweet) == t_vm.FeedItem:
        tweet = asdict(tweet)
        
    if type(includes) == tv2_types.TweetExpansions:
        includes = t_vm.cleandict(asdict(includes))
    
    
    user = list(filter(lambda u: u.get('id') == tweet['author_id'], includes.get('users')))[0]
    
    tweet_url = 'https://twitter.com/{}/status/{}'.format(user['username'], tweet['id'])
    
    content = tweet['text'] + "\n\n[view tweet]({})".format(tweet_url)
    

    card = {
      'id': 'tweet-' + tweet['id'],
      'content': content,
      'content_type': 'text/markdown',
      'created_at': tweet['created_at'], # can be derived from oldest in edit_history_tweet_ids
      'modified_at': None, # can be derived from newest in edit_history_tweet_ids
      'title': '@' + user['username'] + ' at ' + tweet['created_at'],
      'content_source': tweet_url,
      #'tweet': tweet,
      #'user': user
    }
    
    return card


def response_to_cards (response_json, add_included = True):
    """
    Seems to be unused
    """
    tweets = response_json.get('data')
    includes = response_json.get('includes')

    cards = list(map(lambda t: tweet_to_card(t, includes), tweets))
    
    if add_included:
        included_cards = list(map(lambda t: tweet_to_card(t, includes), includes.get('tweets')))
        cards += included_cards
    
    return cards