item_collections.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. from ulid import ULID
  2. from dataclasses import asdict, replace
  3. from importlib.util import find_spec
  4. import os
  5. import json
  6. from flask import request, g, jsonify, render_template, Blueprint, url_for, session
  7. from twitter_v2.api import ApiV2TweetSource
  8. from . import view_model as h_vm
  9. from .view_model import FeedItem, CollectionPage, cleandict
  10. from .content_system import get_content, get_all_content, register_content_source
  11. twitter_enabled = False
  12. if find_spec('twitter_v2_facade'):
  13. from twitter_v2_facade.view_model import tweet_model_dc_vm
  14. twitter_enabled = True
  15. youtube_enabled = False
  16. if find_spec('youtube_v3_facade'):
  17. from youtube_v3_facade.content_source import youtube_model, get_youtube_builder
  18. youtube_enabled = True
  19. DATA_DIR=".data"
  20. item_collections_bp = Blueprint('item_collections', 'item_collections',
  21. static_folder='static',
  22. static_url_path='',
  23. url_prefix='/')
  24. def get_tweet_collection (collection_id):
  25. json_path = f'{DATA_DIR}/collection/{collection_id}.json'
  26. if not os.path.exists(json_path):
  27. return
  28. with open(json_path, 'rt', encoding='utf-8') as f:
  29. collection = json.loads(f.read())
  30. return collection
  31. def collection_from_card_source (url):
  32. """
  33. temp1 = await fetch('http://localhost:5000/notes/cards/search?q=twitter.com/&limit=10').then(r => r.json())
  34. re = /(http[^\s]+twitter\.com\/[^\/]+\/status\/[\d]+)/ig
  35. tweetLinks = temp1.cards.map(c => c.card.content).map(c => c.match(re))
  36. tweetLinks2 = tweetLinks.flat().filter(l => l)
  37. tweetLinksS = Array.from(new Set(tweetLinks2))
  38. statusUrls = tweetLinksS.map(s => new URL(s))
  39. //users = Array.from(new Set(statusUrls.map(s => s.pathname.split('/')[1])))
  40. ids = Array.from(new Set(statusUrls.map(s => parseInt(s.pathname.split('/')[3]))))
  41. """
  42. """
  43. temp1 = JSON.parse(document.body.innerText)
  44. // get swipe note + created_at + tweet user + tweet ID
  45. tweetCards = temp1.cards.map(c => c.card).filter(c => c.content.match(re))
  46. tweets = tweetCards.map(c => ({created_at: c.created_at, content: c.content, tweets: c.content.match(re).map(m => new URL(m))}))
  47. tweets.filter(t => t.tweets.filter(t2 => t2.user.toLowerCase() == 'stephenmpinto').length)
  48. // HN
  49. re = /(http[^\s]+news.ycombinator\.com\/[^\s]+\=[\d]+)/ig
  50. linkCards = temp1.cards.map(c => c.card).filter(c => c.content.match(re))
  51. links = linkCards.map(c => ({created_at: c.created_at, content: c.content, links: c.content.match(re).map(m => new URL(m))}))
  52. // YT (I thnk I've already done this one)
  53. """
  54. # more in 2022 twitter report
  55. return None
  56. def expand_item (item, me, tweets = None, includes = None, yt_videos = None):
  57. if 'id' in item:
  58. t = list(filter(lambda t: item['id'] == t.id, tweets))
  59. if not len(t):
  60. print("no tweet for item: " + item['id'])
  61. feed_item = FeedItem(
  62. id = item['id'],
  63. text = "(Deleted, suspended or blocked)",
  64. created_at = "",
  65. handle = "error",
  66. display_name = "Error"
  67. )
  68. # FIXME 1) put this in relative order to the collection
  69. # FIXME 2) we can use the tweet link to get the user ID...
  70. else:
  71. t = t[0]
  72. feed_item = tweet_model_dc_vm(includes, t, me)
  73. note = item.get('note')
  74. feed_item = replace(feed_item, note = note)
  75. elif 'yt_id' in item:
  76. yt_id = item['yt_id']
  77. vid = list(filter(lambda v: v['id'] == yt_id, yt_videos))[0]
  78. feed_item = youtube_model(vid)
  79. note = item.get('note')
  80. feed_item.update({'note': note})
  81. return feed_item
  82. # pagination token is the next tweet_ID
  83. @item_collections_bp.get('/collection/<collection_id>.html')
  84. def get_collection_html (collection_id):
  85. me = request.args.get('me')
  86. acct = session.get(me)
  87. max_results = int(request.args.get('max_results', 10))
  88. pagination_token = int(request.args.get('pagination_token', 0))
  89. collection = get_tweet_collection(collection_id)
  90. if 'authorized_users' in collection and (not acct or not me in collection['authorized_users']):
  91. return 'access denied.', 403
  92. items = collection['items'][pagination_token:(pagination_token + max_results)]
  93. if not len(items):
  94. return 'no tweets', 404
  95. twitter_token = os.environ.get('BEARER_TOKEN')
  96. if me and me.startswith('twitter:') and acct:
  97. twitter_token = acct['access_token']
  98. tweet_source = ApiV2TweetSource(twitter_token)
  99. tweet_ids = filter(lambda i: 'id' in i, items)
  100. tweet_ids = list(map(lambda item: item['id'], tweet_ids))
  101. tweets_response = tweet_source.get_tweets( tweet_ids, return_dataclass=True )
  102. yt_ids = filter(lambda i: 'yt_id' in i, items)
  103. yt_ids = list(map(lambda item: item['yt_id'], yt_ids))
  104. youtube = get_youtube_builder()
  105. videos_response = youtube.videos().list(id=','.join(yt_ids), part='snippet,contentDetails,liveStreamingDetails,statistics,recordingDetails', maxResults=1).execute()
  106. #print(response_json)
  107. if tweets_response.errors:
  108. # types:
  109. # https://api.twitter.com/2/problems/not-authorized-for-resource (blocked or suspended)
  110. # https://api.twitter.com/2/problems/resource-not-found (deleted)
  111. #print(response_json.get('errors'))
  112. for err in tweets_response.errors:
  113. if not 'type' in err:
  114. print('unknown error type: ' + str(err))
  115. elif err['type'] == 'https://api.twitter.com/2/problems/not-authorized-for-resource':
  116. print('blocked or suspended tweet: ' + err['value'])
  117. elif err['type'] == 'https://api.twitter.com/2/problems/resource-not-found':
  118. print('deleted tweet: ' + err['value'])
  119. else:
  120. print('unknown error')
  121. print(json.dumps(err, indent=2))
  122. includes = tweets_response.includes
  123. tweets = tweets_response.data
  124. feed_items = list(map(lambda item: expand_item(item, me, tweets, includes, videos_response['items']), items))
  125. if request.args.get('format') == 'json':
  126. return jsonify({'ids': tweet_ids,
  127. 'tweets': cleandict(asdict(tweets_response)),
  128. 'feed_items': feed_items,
  129. 'items': items,
  130. 'pagination_token': pagination_token})
  131. else:
  132. query = {}
  133. if pagination_token:
  134. query['next_data_url'] = url_for('.get_collection_html', collection_id=collection_id, pagination_token=pagination_token)
  135. if 'HX-Request' in request.headers:
  136. return render_template('partial/tweets-timeline.html', tweets = feed_items, user = {}, query = query)
  137. else:
  138. if pagination_token:
  139. query['next_page_url'] = url_for('.get_collection_html', collection_id=collection_id, pagination_token=pagination_token)
  140. return render_template('tweet-collection.html', tweets = feed_items, user = {}, query = query)
  141. def get_collection_list (me = None):
  142. me = request.args.get('me')
  143. acct = session.get(me)
  144. collections = []
  145. with os.scandir('.data/collection') as collections_files:
  146. for collection_file in collections_files:
  147. if not collection_file.name.endswith('.json'):
  148. continue
  149. with open(collection_file.path, 'rt', encoding='utf-8') as f:
  150. coll = json.load(f)
  151. if 'authorized_users' in coll and (not acct or not me in coll['authorized_users']):
  152. continue
  153. collection_id = collection_file.name[:-len('.json')]
  154. version = coll.get('_version')
  155. if not version: # legacy
  156. version = str(ULID())
  157. coll_info = dict(
  158. id = collection_id,
  159. _version = version,
  160. href = url_for('.get_collection_html', collection_id=collection_id)
  161. )
  162. collections.append(coll_info)
  163. return collections
  164. # pagination token is the next tweet_ID
  165. @item_collections_bp.get('/collections.html')
  166. def get_collections_html ():
  167. me = request.args.get('me')
  168. collections = get_content('collections:list', me=me)
  169. return jsonify(collections)
  170. def update_collection (collection_id, new_collection, op='replace', version=None, me = None):
  171. path = f'.data/collection/{collection_id}.json'
  172. existing_collection = None
  173. if os.path.exists(path):
  174. with open(path, 'rt', encoding='utf-8') as f:
  175. existing_collection = json.load(f)
  176. existing_version = existing_collection and existing_collection.get('_version')
  177. if existing_collection and existing_version != version:
  178. raise Error('updating with a wrong version. probably using a stale copy. fetch and retry op.')
  179. if op == 'insert':
  180. after_id = request.form.get('after_id')
  181. raise Error('not supported yet')
  182. elif op == 'append':
  183. existing_collection['items'] += new_collection['items']
  184. new_collection = existing_collection
  185. elif op == 'prepend':
  186. existing_collection['items'] = new_collection['items'] + existing_collection['items']
  187. new_collection = existing_collection
  188. new_version = str(ULID()) # content addressable hash of json w/o this key or similar
  189. new_collection['_version'] = new_version
  190. with open(path, 'wt', encoding='utf-8') as f:
  191. json.dump(new_collection, f)
  192. return new_version
  193. @item_collections_bp.post('/collection/<collection_id>.html')
  194. def post_collection_html (collection_id):
  195. op = request.form.get('op', 'replace')
  196. version = request.form.get('version')
  197. new_collection = request.form.get('collection.json')
  198. new_collection = json.loads(new_collection) # FIXME probably wrong
  199. new_version = get_content('collection:update', collection_id, new_collection, op=op, me=me)
  200. return jsonify({'_version': new_version})
  201. @item_collections_bp.get('/collection/test-update/<collection_id>.html')
  202. def get_collection_test_update_html (collection_id):
  203. me = None
  204. op = 'prepend'
  205. version = request.args.get('version')
  206. new_collection = {
  207. 'items': [{'id': 'zzz999'}]
  208. }
  209. new_version = get_content(f'collections:update:{collection_id}',
  210. new_collection,
  211. version=version,
  212. op=op,
  213. me=me)
  214. return jsonify({'_version': new_version})
  215. @item_collections_bp.post('/data/collection/create/from-cards')
  216. def post_data_collection_create_from_cards ():
  217. """
  218. // create collection from search, supporting multiple Tweets per card and Tweets in multiple Cards.
  219. re = /(https?[a-z0-9\.\/\:]+twitter\.com\/[0-9a-z\_]+\/status\/[\d]+)/ig
  220. temp1 = await fetch('http://localhost:5000/notes/cards/search?q=twitter.com/').then(r => r.json())
  221. cardMatches = temp1.cards
  222. .map(cm => Object.assign({}, cm, {tweetLinks: Array.from(new Set(cm.card.content.match(re)))}))
  223. .filter(cm => cm.tweetLinks && cm.tweetLinks.length)
  224. .map(cm => Object.assign({}, cm, {tweetUrls: cm.tweetLinks.map(l => new URL(l))}))
  225. .map(cm => Object.assign({}, cm, {tweetInfos: cm.tweetUrls.map(u => ({user: u.pathname.split('/')[1], tweetId: u.pathname.split('/')[3]}))}));
  226. collectionCards = {}
  227. cardMatches.forEach(function (cm) {
  228. if (!cm.tweetLinks.length) { return; }
  229. cm.tweetInfos.forEach(function (ti) {
  230. if (!collectionCards[ti.tweetId]) {
  231. collectionCards[ti.tweetId] = [];
  232. }
  233. collectionCards[ti.tweetId].push(cm.card);
  234. })
  235. })
  236. var collectionItems = [];
  237. Object.entries(collectionCards).forEach(function (e) {
  238. var tweetId = e[0], cards = e[1];
  239. var note = cards.map(function (card) {
  240. return card.created_at + "\n\n" + card.content;
  241. }).join("\n\n-\n\n");
  242. collectionItems.push({id: tweetId, note: note, tweet_infos: cm.tweetInfos, card_infos: cards.map(c => 'card#' + c.id)});
  243. })
  244. """
  245. collection = {
  246. 'items': [], # described in JS function above
  247. 'authorized_users': [g.twitter_user['id']]
  248. }
  249. return jsonify(collection)
  250. def get_item_id (obj_or_dict, id_key='id'):
  251. if type(obj_or_dict) == dict:
  252. return obj_or_dict[id_key]
  253. else:
  254. return getattr(obj_or_dict, id_key)
  255. def expand_item2 (item, me, content_responses):
  256. content_id = item['id']
  257. content_response = content_responses[ content_id ]
  258. if type(content_response) == CollectionPage:
  259. tweets = content_response.items
  260. elif type(content_response) == list:
  261. tweets = content_response
  262. else:
  263. tweets = [content_response]
  264. # endswith is a hack. Really FeedItems should return a full ID with prefix.
  265. t = list(filter(lambda t: content_id.endswith(f':{get_item_id(t)}'), tweets))
  266. if not len(t):
  267. print("no tweet for item: " + item['id'])
  268. feed_item = FeedItem(
  269. id = item['id'],
  270. text = "(Deleted, suspended or blocked)",
  271. created_at = "",
  272. handle = "error",
  273. display_name = "Error"
  274. )
  275. # FIXME 1) put this in relative order to the collection
  276. # FIXME 2) we can use the tweet link to get the user ID...
  277. else:
  278. feed_item = t[0]
  279. note = item.get('note')
  280. if type(feed_item) == dict:
  281. feed_item.update(note = note)
  282. else:
  283. feed_item = replace(feed_item, note = note)
  284. return feed_item
  285. def get_collection (collection_id, me=None, pagination_token:str = None, max_results:int =10):
  286. collection = get_tweet_collection(collection_id)
  287. if not collection:
  288. return
  289. first_idx = int(pagination_token or 0)
  290. last_idx = first_idx + max_results
  291. items = collection['items'][first_idx:last_idx]
  292. content_ids = list(map(lambda item: item['id'], items))
  293. content_responses = get_all_content( tuple(content_ids), enable_bulk_fetch=True )
  294. feed_items = list(map(lambda item: expand_item2(item, me, content_responses), items))
  295. collection['items'] = feed_items
  296. if len(collection['items']) == max_results:
  297. collection['next_token'] = str(last_idx)
  298. return collection
  299. def register_content_sources ():
  300. register_content_source("collection:", get_collection, id_pattern="([^:]+)")
  301. register_content_source("collections:list", get_collection_list, id_pattern="")
  302. register_content_source("collections:update:", update_collection, id_pattern="([A-Za-z0-9\-\_\.]+)")
  303. # pagination token is the next tweet_ID
  304. @item_collections_bp.get('/collection2/<collection_id>.html')
  305. def get_collection2_html (collection_id):
  306. me = request.args.get('me')
  307. acct = session.get(me)
  308. max_results = int(request.args.get('limit', 1))
  309. pagination_token = request.args.get('pagination_token', 0)
  310. #collection = get_tweet_collection(collection_id)
  311. collection = get_content(f'collection:{collection_id}',
  312. me=me,
  313. pagination_token=pagination_token,
  314. max_results=max_results)
  315. if 'authorized_users' in collection and (not acct or not me in collection['authorized_users']):
  316. return 'access denied.', 403
  317. feed_items = collection['items']
  318. pagination_token = collection.get('next_token')
  319. if not len(feed_items):
  320. return 'no tweets', 404
  321. if request.args.get('format') == 'json':
  322. return jsonify({'ids': tweet_ids,
  323. 'tweets': cleandict(asdict(tweets_response)),
  324. 'feed_items': feed_items,
  325. 'items': items,
  326. 'pagination_token': pagination_token})
  327. else:
  328. query = {}
  329. if pagination_token:
  330. query['next_data_url'] = url_for('.get_collection2_html', collection_id=collection_id, pagination_token=pagination_token, limit=max_results, me=me)
  331. query['next_page_url'] = url_for('.get_collection2_html', collection_id=collection_id, pagination_token=pagination_token, limit=max_results, me=me)
  332. if 'HX-Request' in request.headers:
  333. return render_template('partial/tweets-timeline.html', tweets = feed_items, user = {}, query = query)
  334. else:
  335. if pagination_token:
  336. query['next_page_url'] = url_for('.get_collection2_html', me=me, collection_id=collection_id, pagination_token=pagination_token)
  337. source_url = url_for('.get_collection2_html', collection_id=collection_id, _external=True)
  338. title = f'Collection: {collection_id} on Hogumathi'
  339. opengraph_info = dict(
  340. type = 'webpage', # threads might be article
  341. url = source_url,
  342. title = title,
  343. description = title,
  344. #image = user.profile_image_url
  345. )
  346. return render_template('tweet-collection.html', tweets = feed_items, user = {}, query = query, opengraph_info=opengraph_info)