Explorar el Código

added 2023-05-21/youtube_info_fetch practice: sqlite3, caching, youtube api, layered code: model

Harlan J. Iverson hace 1 año
padre
commit
54f77889ed

+ 1 - 0
2023-05-21/youtube_info_fetch/.gitignore

@@ -0,0 +1 @@
+.data

+ 45 - 0
2023-05-21/youtube_info_fetch/README.md

@@ -0,0 +1,45 @@
+# YouTube Info Fetch
+
+Given a list of Video IDs, fetch them and store them to a SQLite3 DB cache for 
+subsequent fetches.
+
+Store them by authorized user and with a timestamp, so they can be fetched with 
+a max age.
+
+## Setup
+
+A YouTube API project with client info stored in `.data/yt-client-secret.json`,
+and optionally channel config stured in `.data/session_{channel_id}.json` where
+channel_id is configured in `youtube_info_fetch.py` as `AUTH_CHANNEL_ID`.
+
+A template of the format for `.data/session_{channel_id}.json` config 
+can be found in `session_sample.json`. It it's not present then it uses app 
+level access, which is sufficient.
+
+It doesn't handle login or refresh token, which I think an earlier dev practice 
+covered. The credentials is the `flow.credentials` from 
+`google_auth_oauthlib.flow.Flow.from_client_secrets_file` after `fetch_token` is 
+called.
+
+## Usage
+
+Run `python youtube_info_fetch.py` after setting `video_ids` and optionally `AUTH_CHANNEL_ID` at the top of the file.
+
+Add more video_ids between runs to see cache behavior.
+
+Data is stored in `.data/youtube.db` and can be inspected with any SQLite3 library.
+
+## Dev Practice
+
+This is a dev practice that touches on these themes:
+
+* SQLite3
+* Caching
+* YouTube API
+* Layered Code: Model
+
+It's about the 5th practice I've done where I've broken the code into a "model" 
+layer.
+
+
+

+ 66 - 0
2023-05-21/youtube_info_fetch/db_model.py

@@ -0,0 +1,66 @@
+import json
+
+import sqlite3
+
+DB_PATH = '.data/youtube.db'
+
+def init_db ( path = DB_PATH ):
+    db = sqlite3.connect(path)
+    
+    cur = db.cursor()
+    
+    table_exists = cur.execute(f"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='youtube_videos'").fetchone()[0]
+    
+    if not table_exists:
+        
+        print(f'creating DB {path}')
+        
+        cur.execute("""
+            create table youtube_videos (
+                id text,
+                ts timestamp default current_timestamp,
+                auth_user_id text,
+                data blob
+            )
+        """)
+        
+        cur.connection.commit()
+    
+    cur.close()
+
+def store_video_infos (vid_infos, auth_user_id=None, db_path=DB_PATH):
+    db = sqlite3.connect(db_path)
+    
+    cur = db.cursor()
+    
+    for vid_info in vid_infos:
+        video_id = vid_info['id']
+        cur.execute("""
+            INSERT INTO youtube_videos (id, auth_user_id, data) VALUES(?,?,?)
+        """, [video_id, auth_user_id, json.dumps(vid_info, indent=2)])
+        
+    cur.connection.commit()
+    cur.close()
+
+
+def load_video_infos (video_ids, auth_user_id=None, db_path=DB_PATH):
+    db = sqlite3.connect(db_path)
+    
+    cur = db.cursor()
+    
+    params = [auth_user_id] + video_ids
+    values_sql = ','.join(['?'] * (len(video_ids)))
+    
+    yt_video_rows = cur.execute(f"""
+            SELECT data 
+            FROM youtube_videos 
+            WHERE 
+                (auth_user_id = ? or auth_user_id is null)
+                and id IN ({values_sql})
+        """, params).fetchall()
+        
+    cur.close()
+    
+    vid_infos = list(map(lambda row: json.loads(row[0]), yt_video_rows))
+    
+    return vid_infos

+ 33 - 0
2023-05-21/youtube_info_fetch/model.py

@@ -0,0 +1,33 @@
+import youtube_model
+import db_model
+
+def get_video_infos (video_ids, youtube_user, db_path):
+    """
+    Get YouTube video info from the cache DB if we have it, fetching and storing anything we don't already have.
+    
+    Fetches up to 50 videos due to API limitations and the fact we don't code to handle more.
+    
+    Is auth aware; returns videos fetched with the app key or auth_user_id.
+    """
+    auth_user_id = None
+    if youtube_user:
+        auth_user_id = youtube_user['id']
+    
+    vid_infos = db_model.load_video_infos(video_ids, auth_user_id, db_path=db_path)
+    
+    loaded_video_ids = set(map(lambda vid_info: vid_info['id'], vid_infos))
+    
+    fetch_video_ids = list(set(video_ids) - loaded_video_ids)
+    
+    if fetch_video_ids:
+        print(f'get_video_infos: fetching videos: {fetch_video_ids}')
+        fetched_vid_infos = youtube_model.fetch_video_infos(fetch_video_ids, youtube_user=youtube_user)
+        
+        print(f'get_video_infos: fetched videos, length={len(fetched_vid_infos)}')
+        
+        db_model.store_video_infos(fetched_vid_infos, auth_user_id=auth_user_id, db_path=db_path)
+        
+        vid_infos += fetched_vid_infos
+    
+    
+    return vid_infos

+ 3 - 0
2023-05-21/youtube_info_fetch/requirements.txt

@@ -0,0 +1,3 @@
+google-api-python-client
+google-auth-oauthlib
+google-auth-httplib2

+ 13 - 0
2023-05-21/youtube_info_fetch/session_sample.json

@@ -0,0 +1,13 @@
+{
+	"credentials": {
+		"client_id": "",
+		"client_secret": "",
+		"refresh_token": "",
+		"scopes": ["https://www.googleapis.com/auth/youtube.force-ssl"],
+		"token": "",
+		"token_uri": "https://oauth2.googleapis.com/token"},
+		"display_name": "HarlanJI",
+		"id": "UCX7yXeFV78vXwMX4uanMldQ",
+		"username": "harlanji"
+		}
+}

+ 54 - 0
2023-05-21/youtube_info_fetch/youtube_info_fetch.py

@@ -0,0 +1,54 @@
+import os
+
+import json
+
+import db_model
+import youtube_model
+import model
+
+AUTH_CHANNEL_ID = None
+"""
+Set to None to use app level access.
+
+Data fetched without credentials can be more liberally cached and shared among accounts.
+
+See session_sample.json for the format.
+"""
+
+def main ():
+    if not os.path.exists('.data'):
+        os.mkdir('.data')
+    
+    if not os.path.exists('.data/yt-client-secret.json'):
+        print('.data/yt-client-secret does not exist. load it up with your youtube client config')
+        exit(-1)
+    
+    db_path = db_model.DB_PATH
+    db_model.init_db( db_path )
+    
+    if not AUTH_CHANNEL_ID:
+        print('using app level access')
+    
+    youtube_user = youtube_model.get_youtube_user(AUTH_CHANNEL_ID)
+    
+    if AUTH_CHANNEL_ID and not youtube_user:
+        print('could not load auth for configured channel. exiting.')
+        exit(-1)
+    
+    
+    video_ids = [
+        'Z6ih1aKeETk',
+        'vE-ViyPXj4Q',
+        'jfKfPfyJRdk',
+        '5d7e9lj8BQw',
+        '_sSBKm-CDNU'
+    ]
+    
+    
+    vid_infos = model.get_video_infos(video_ids, youtube_user=youtube_user, db_path=db_path)
+    
+    print(f'got videos, count={len(vid_infos)}')
+    #print(json.dumps(vid_infos, indent=2))
+
+if __name__ == '__main__':
+    main()

+ 54 - 0
2023-05-21/youtube_info_fetch/youtube_model.py

@@ -0,0 +1,54 @@
+import os
+
+import json
+
+import google.oauth2.credentials
+import googleapiclient.discovery
+
+SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
+API_SERVICE_NAME = 'youtube'
+API_VERSION = 'v3'
+
+VIDEO_PARTS = 'snippet,contentDetails,liveStreamingDetails,statistics,recordingDetails'.split(',')
+
+
+def get_youtube_builder (youtube_user = None):
+    
+    if youtube_user:
+        
+        credentials = google.oauth2.credentials.Credentials(
+          **youtube_user['credentials'])
+        
+        youtube = googleapiclient.discovery.build(
+          API_SERVICE_NAME, API_VERSION, credentials=credentials)
+    else:
+        print('using developer key for YT api')
+        
+        developer_key = os.environ.get('GOOGLE_DEVELOPER_KEY')
+        youtube = googleapiclient.discovery.build(
+          API_SERVICE_NAME, API_VERSION, developerKey=developer_key) 
+    
+    return youtube
+
+def get_youtube_user (channel_id):
+    session_path = f'.data/session_{channel_id}.json'
+    if os.path.exists(session_path):
+        with open(session_path, 'rt', encoding='utf-8') as f:
+            return json.load(f)
+    else:
+        print(f'No config for channel {channel_id}')
+        return None
+
+def fetch_video_infos (video_ids, video_parts=VIDEO_PARTS, max_results=50, youtube_user=None):
+    youtube = get_youtube_builder(youtube_user)
+    
+    # FIXME add pagination
+    if len(video_ids) > max_results or max_results > 50:
+        raise Exception('requesting more videos than max results or supported and pagination is not supported.')
+    
+    videos = youtube.videos().list(id=video_ids,
+        part=','.join(video_parts),
+        maxResults=max_results
+        ).execute()
+    
+    return videos['items']