test_content_system.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. from typing import List, Dict
  2. import pytest
  3. from hogumathi_app.content_system import ContentSystem
  4. def tweet_cs (tweet_id:str) -> object:
  5. print(f'tweet_cs: {tweet_id}')
  6. if tweet_id == '1':
  7. return {'text': 'one', 'id': '1'}
  8. def tweets_cs (content_ids:List[str]) -> Dict[str,object]:
  9. print(f'tweets_cs: {content_ids}')
  10. if ','.join(content_ids) == 'twitter:tweet:1,twitter:tweet:2':
  11. return {
  12. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  13. 'twitter:tweet:2': {'text': 'two', 'id': '2'}
  14. }
  15. elif ','.join(content_ids) == 'twitter:tweet:2':
  16. return {
  17. 'twitter:tweet:2': {'text': 'two', 'id': '2'}
  18. }
  19. def video_cs (vid_id:str) -> object:
  20. print(f'video_cs: {vid_id}')
  21. if vid_id == '3':
  22. return {'url': 'three.mp4', 'id': '3'}
  23. def videos_cs (content_ids:List[str]) -> Dict[str, object]:
  24. print(f'videos_cs: {content_ids}')
  25. if ','.join(content_ids) == 'youtube:video:3,youtube:video:4':
  26. return {
  27. 'youtube:video:3': {'url': 'three.mp4', 'id': '3'},
  28. 'youtube:video:4': {'url': 'four.mp4', 'id': '4'}
  29. }
  30. def photo_cs (post_id:str) -> object:
  31. print(f'photo_cs: {post_id}')
  32. if post_id == '3':
  33. return {'url': 'three.png', 'id': '3'}
  34. def test_content_system ():
  35. h_cs = ContentSystem()
  36. h_cs.register_content_source('twitter:tweet:', tweet_cs)
  37. h_cs.register_content_source('twitter:tweets', tweets_cs)
  38. t1 = h_cs.get_content('twitter:tweet:1')
  39. t2 = h_cs.get_content('twitter:tweet:2')
  40. t3 = h_cs.get_content('fake:1')
  41. assert(t1 == {'text': 'one', 'id': '1'})
  42. assert(t2 == None)
  43. assert(t3 == None)
  44. t_multi = h_cs.get_content('twitter:tweets', content_ids=['twitter:tweet:1', 'twitter:tweet:2'])
  45. assert(t_multi == {
  46. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  47. 'twitter:tweet:2': {'text': 'two', 'id': '2'}
  48. })
  49. def test_bulk_content ():
  50. h_cs = ContentSystem()
  51. h_cs.register_content_source('twitter:tweet:', tweet_cs)
  52. h_cs.register_content_source('twitter:tweets', tweets_cs, id_pattern='')
  53. tweets = h_cs.get_all_content(['twitter:tweet:1', 'twitter:tweet:2', 'fake:1'], enable_bulk_fetch=True)
  54. assert(tweets == {
  55. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  56. 'twitter:tweet:2': {'text': 'two', 'id': '2'},
  57. 'fake:1': None
  58. })
  59. @pytest.mark.xfail(reason="need to rethink get_all_content code for this.")
  60. def test_bulk_content_partial_miss ():
  61. """
  62. xfail: note in ContentSystem.get_content
  63. """
  64. h_cs = ContentSystem()
  65. def tweets_cache_cs (content_ids):
  66. if ','.join(content_ids) == 'twitter:tweet:1,twitter:tweet:2':
  67. return {
  68. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  69. 'twitter:tweet:2': None
  70. }
  71. h_cs.register_content_source('twitter:tweet:', tweet_cs)
  72. h_cs.register_content_source('twitter:tweets', tweets_cs, id_pattern='')
  73. h_cs.register_content_source('', tweets_cache_cs, id_pattern='(.+)', weight=99999)
  74. tweets = h_cs.get_all_content(['twitter:tweet:1', 'twitter:tweet:2', 'fake:1'], enable_bulk_fetch=True)
  75. assert(tweets == {
  76. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  77. 'twitter:tweet:2': {'text': 'two', 'id': '2'},
  78. 'fake:1': None
  79. })
  80. def test_hooks_bulk_content ():
  81. h_cs = ContentSystem()
  82. h_cs.register_content_source('twitter:tweet:', tweet_cs)
  83. h_cs.register_content_source('twitter:tweets', tweets_cs, id_pattern='')
  84. hooked_content = []
  85. def got_content(content_id, content):
  86. hooked_content.append([content_id, content])
  87. h_cs.register_hook('got_content', got_content)
  88. content = h_cs.get_all_content(['twitter:tweet:1', 'twitter:tweet:2', 'fake:1'], enable_bulk_fetch=True)
  89. assert(content == {
  90. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  91. 'twitter:tweet:2': {'text': 'two', 'id': '2'},
  92. 'fake:1': None
  93. })
  94. print(f'hooked_content: {hooked_content}')
  95. assert(hooked_content == [
  96. ['twitter:tweets', {
  97. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  98. 'twitter:tweet:2': {'text': 'two', 'id': '2'}
  99. }],
  100. ['twitter:tweet:1', {'text': 'one', 'id': '1'}],
  101. ['twitter:tweet:2', {'text': 'two', 'id': '2'}]
  102. ])
  103. def test_hooks_bulk_content_multi_bulk ():
  104. h_cs = ContentSystem()
  105. h_cs.register_content_source('twitter:tweet:', tweet_cs)
  106. h_cs.register_content_source('twitter:tweets', tweets_cs, id_pattern='')
  107. h_cs.register_content_source('youtube:video:', video_cs)
  108. h_cs.register_content_source('youtube:videos', videos_cs, id_pattern='')
  109. h_cs.register_content_source('instagram:post:', photo_cs)
  110. hooked_content = []
  111. def got_content(content_id, content):
  112. hooked_content.append([content_id, content])
  113. h_cs.register_hook('got_content', got_content)
  114. content = h_cs.get_all_content(['twitter:tweet:1', 'twitter:tweet:2',
  115. 'youtube:video:3', 'youtube:video:4',
  116. 'instagram:post:3',
  117. 'fake:1'], enable_bulk_fetch=True)
  118. assert(content == {
  119. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  120. 'twitter:tweet:2': {'text': 'two', 'id': '2'},
  121. 'youtube:video:3': {'url': 'three.mp4', 'id': '3'},
  122. 'youtube:video:4': {'url': 'four.mp4', 'id': '4'},
  123. 'instagram:post:3': {'url': 'three.png', 'id': '3'},
  124. 'fake:1': None
  125. })
  126. print(f'hooked_content: {hooked_content}')
  127. assert(hooked_content == [
  128. ['instagram:post:3', {'url': 'three.png', 'id': '3'}],
  129. ['twitter:tweets', {
  130. 'twitter:tweet:1': {'text': 'one', 'id': '1'},
  131. 'twitter:tweet:2': {'text': 'two', 'id': '2'}
  132. }],
  133. ['twitter:tweet:1', {'text': 'one', 'id': '1'}],
  134. ['twitter:tweet:2', {'text': 'two', 'id': '2'}],
  135. ['youtube:videos', {
  136. 'youtube:video:3': {'url': 'three.mp4', 'id': '3'},
  137. 'youtube:video:4': {'url': 'four.mp4', 'id': '4'}
  138. }],
  139. ['youtube:video:3', {'url': 'three.mp4', 'id': '3'}],
  140. ['youtube:video:4', {'url': 'four.mp4', 'id': '4'}]
  141. ])
  142. def test_cache ():
  143. h_cs = ContentSystem()
  144. cache_hits = []
  145. cache = {}
  146. def cache_cs (content_id = None, content_ids = None):
  147. if content_id in cache:
  148. content = cache[content_id]
  149. cache_hits.append([content_id, content])
  150. return content
  151. h_cs.register_content_source('twitter:tweet:', tweet_cs)
  152. h_cs.register_content_source('twitter:tweets', tweets_cs, id_pattern='')
  153. h_cs.register_content_source('', cache_cs, id_pattern='(.+)', weight=99999)
  154. def cache_hook (content_id, content):
  155. # FIXME we might need a skip hook mechanism
  156. # this will be invoked even with a cache hit.
  157. # perhaps pass source_id into the hook
  158. if content_id.endswith('s') or content_id in cache:
  159. # FIXME exclusion list/patterns
  160. # we should allow bulk fetches to get cached items
  161. return
  162. cache[content_id] = content
  163. h_cs.register_hook('got_content', cache_hook)
  164. t1 = h_cs.get_content('twitter:tweet:1')
  165. t2 = h_cs.get_content('twitter:tweet:2')
  166. t3 = h_cs.get_content('fake:1')
  167. assert(t1 == {'text': 'one', 'id': '1'})
  168. assert(t2 == None)
  169. assert(t3 == None)
  170. assert(cache_hits == [])
  171. t1_2 = h_cs.get_content('twitter:tweet:1')
  172. t2_2 = h_cs.get_content('twitter:tweet:2')
  173. t3_2 = h_cs.get_content('fake:1')
  174. assert(t1_2 == t1)
  175. assert(t2_2 == None)
  176. assert(t2_2 == None)
  177. print(f'cache_hits = {cache_hits}')
  178. assert(cache_hits == [
  179. ['twitter:tweet:1', {'text': 'one', 'id': '1'}]
  180. ])
  181. def test_cache_bulk ():
  182. h_cs = ContentSystem()
  183. cache_hits = []
  184. cache = {}
  185. def cache_cs (content_id = None, content_ids = None):
  186. if content_id and content_id in cache:
  187. content = cache[content_id]
  188. cache_hits.append([content_id, content])
  189. return content
  190. elif content_ids:
  191. results = {}
  192. for content_id in content_ids:
  193. content = cache_cs(content_id)
  194. if content:
  195. results[content_id] = content
  196. return results
  197. h_cs.register_content_source('twitter:tweet:', tweet_cs)
  198. h_cs.register_content_source('twitter:tweets', tweets_cs, id_pattern='')
  199. h_cs.register_content_source('youtube:video:', video_cs)
  200. h_cs.register_content_source('youtube:videos', videos_cs, id_pattern='')
  201. h_cs.register_content_source('instagram:post:', photo_cs)
  202. h_cs.register_content_source('', cache_cs, id_pattern='(.+)', weight=99999)
  203. def cache_hook (content_id, content):
  204. # FIXME we might need a skip hook mechanism
  205. # this will be invoked even with a cache hit.
  206. # perhaps pass source_id into the hook
  207. if content_id.endswith('s') or content_id in cache:
  208. # FIXME exclusion list/patterns
  209. # we should allow bulk fetches to get cached items
  210. return
  211. cache[content_id] = content
  212. h_cs.register_hook('got_content', cache_hook)
  213. content = h_cs.get_all_content(['twitter:tweet:1', 'twitter:tweet:2',
  214. 'youtube:video:3', 'youtube:video:4',
  215. 'instagram:post:3',
  216. 'fake:1'], enable_bulk_fetch=True)
  217. assert(cache_hits == [])
  218. content2 = h_cs.get_all_content(['twitter:tweet:1', 'twitter:tweet:2',
  219. 'youtube:video:3', 'youtube:video:4',
  220. 'instagram:post:3',
  221. 'fake:1'], enable_bulk_fetch=True)
  222. assert(content == content2)
  223. print(f'cache_hits = {cache_hits}')
  224. assert(cache_hits == [
  225. ['instagram:post:3', {'url': 'three.png', 'id': '3'}],
  226. ['twitter:tweet:1', {'text': 'one', 'id': '1'}],
  227. ['twitter:tweet:2', {'text': 'two', 'id': '2'}],
  228. ['youtube:video:3', {'url': 'three.mp4', 'id': '3'}],
  229. ['youtube:video:4', {'url': 'four.mp4', 'id': '4'}]
  230. ])