hace 2 años · 54f77889ed
--- a/2023-05-21/youtube_info_fetch/.gitignore
+++ b/2023-05-21/youtube_info_fetch/.gitignore
@@ -0,0 +1 @@
 
				+.data
			
--- a/2023-05-21/youtube_info_fetch/README.md
+++ b/2023-05-21/youtube_info_fetch/README.md
@@ -0,0 +1,45 @@
 
				+# YouTube Info Fetch
			
 
				+
			
 
				+Given a list of Video IDs, fetch them and store them to a SQLite3 DB cache for 
			
 
				+subsequent fetches.
			
 
				+
			
 
				+Store them by authorized user and with a timestamp, so they can be fetched with 
			
 
				+a max age.
			
 
				+
			
 
				+## Setup
			
 
				+
			
 
				+A YouTube API project with client info stored in `.data/yt-client-secret.json`,
			
 
				+and optionally channel config stured in `.data/session_{channel_id}.json` where
			
 
				+channel_id is configured in `youtube_info_fetch.py` as `AUTH_CHANNEL_ID`.
			
 
				+
			
 
				+A template of the format for `.data/session_{channel_id}.json` config 
			
 
				+can be found in `session_sample.json`. It it's not present then it uses app 
			
 
				+level access, which is sufficient.
			
 
				+
			
 
				+It doesn't handle login or refresh token, which I think an earlier dev practice 
			
 
				+covered. The credentials is the `flow.credentials` from 
			
 
				+`google_auth_oauthlib.flow.Flow.from_client_secrets_file` after `fetch_token` is 
			
 
				+called.
			
 
				+
			
 
				+## Usage
			
 
				+
			
 
				+Run `python youtube_info_fetch.py` after setting `video_ids` and optionally `AUTH_CHANNEL_ID` at the top of the file.
			
 
				+
			
 
				+Add more video_ids between runs to see cache behavior.
			
 
				+
			
 
				+Data is stored in `.data/youtube.db` and can be inspected with any SQLite3 library.
			
 
				+
			
 
				+## Dev Practice
			
 
				+
			
 
				+This is a dev practice that touches on these themes:
			
 
				+
			
 
				+* SQLite3
			
 
				+* Caching
			
 
				+* YouTube API
			
 
				+* Layered Code: Model
			
 
				+
			
 
				+It's about the 5th practice I've done where I've broken the code into a "model" 
			
 
				+layer.
			
 
				+
			
 
				+
			
 
				+
			
--- a/2023-05-21/youtube_info_fetch/db_model.py
+++ b/2023-05-21/youtube_info_fetch/db_model.py
@@ -0,0 +1,66 @@
 
				+import json
			
 
				+
			
 
				+import sqlite3
			
 
				+
			
 
				+DB_PATH = '.data/youtube.db'
			
 
				+
			
 
				+def init_db ( path = DB_PATH ):
			
 
				+    db = sqlite3.connect(path)
			
 
				+    
			
 
				+    cur = db.cursor()
			
 
				+    
			
 
				+    table_exists = cur.execute(f"SELECT count(*) FROM sqlite_master WHERE type='table' AND name='youtube_videos'").fetchone()[0]
			
 
				+    
			
 
				+    if not table_exists:
			
 
				+        
			
 
				+        print(f'creating DB {path}')
			
 
				+        
			
 
				+        cur.execute("""
			
 
				+            create table youtube_videos (
			
 
				+                id text,
			
 
				+                ts timestamp default current_timestamp,
			
 
				+                auth_user_id text,
			
 
				+                data blob
			
 
				+            )
			
 
				+        """)
			
 
				+        
			
 
				+        cur.connection.commit()
			
 
				+    
			
 
				+    cur.close()
			
 
				+
			
 
				+def store_video_infos (vid_infos, auth_user_id=None, db_path=DB_PATH):
			
 
				+    db = sqlite3.connect(db_path)
			
 
				+    
			
 
				+    cur = db.cursor()
			
 
				+    
			
 
				+    for vid_info in vid_infos:
			
 
				+        video_id = vid_info['id']
			
 
				+        cur.execute("""
			
 
				+            INSERT INTO youtube_videos (id, auth_user_id, data) VALUES(?,?,?)
			
 
				+        """, [video_id, auth_user_id, json.dumps(vid_info, indent=2)])
			
 
				+        
			
 
				+    cur.connection.commit()
			
 
				+    cur.close()
			
 
				+
			
 
				+
			
 
				+def load_video_infos (video_ids, auth_user_id=None, db_path=DB_PATH):
			
 
				+    db = sqlite3.connect(db_path)
			
 
				+    
			
 
				+    cur = db.cursor()
			
 
				+    
			
 
				+    params = [auth_user_id] + video_ids
			
 
				+    values_sql = ','.join(['?'] * (len(video_ids)))
			
 
				+    
			
 
				+    yt_video_rows = cur.execute(f"""
			
 
				+            SELECT data 
			
 
				+            FROM youtube_videos 
			
 
				+            WHERE 
			
 
				+                (auth_user_id = ? or auth_user_id is null)
			
 
				+                and id IN ({values_sql})
			
 
				+        """, params).fetchall()
			
 
				+        
			
 
				+    cur.close()
			
 
				+    
			
 
				+    vid_infos = list(map(lambda row: json.loads(row[0]), yt_video_rows))
			
 
				+    
			
 
				+    return vid_infos
			
--- a/2023-05-21/youtube_info_fetch/model.py
+++ b/2023-05-21/youtube_info_fetch/model.py
@@ -0,0 +1,33 @@
 
				+import youtube_model
			
 
				+import db_model
			
 
				+
			
 
				+def get_video_infos (video_ids, youtube_user, db_path):
			
 
				+    """
			
 
				+    Get YouTube video info from the cache DB if we have it, fetching and storing anything we don't already have.
			
 
				+    
			
 
				+    Fetches up to 50 videos due to API limitations and the fact we don't code to handle more.
			
 
				+    
			
 
				+    Is auth aware; returns videos fetched with the app key or auth_user_id.
			
 
				+    """
			
 
				+    auth_user_id = None
			
 
				+    if youtube_user:
			
 
				+        auth_user_id = youtube_user['id']
			
 
				+    
			
 
				+    vid_infos = db_model.load_video_infos(video_ids, auth_user_id, db_path=db_path)
			
 
				+    
			
 
				+    loaded_video_ids = set(map(lambda vid_info: vid_info['id'], vid_infos))
			
 
				+    
			
 
				+    fetch_video_ids = list(set(video_ids) - loaded_video_ids)
			
 
				+    
			
 
				+    if fetch_video_ids:
			
 
				+        print(f'get_video_infos: fetching videos: {fetch_video_ids}')
			
 
				+        fetched_vid_infos = youtube_model.fetch_video_infos(fetch_video_ids, youtube_user=youtube_user)
			
 
				+        
			
 
				+        print(f'get_video_infos: fetched videos, length={len(fetched_vid_infos)}')
			
 
				+        
			
 
				+        db_model.store_video_infos(fetched_vid_infos, auth_user_id=auth_user_id, db_path=db_path)
			
 
				+        
			
 
				+        vid_infos += fetched_vid_infos
			
 
				+    
			
 
				+    
			
 
				+    return vid_infos
			
--- a/2023-05-21/youtube_info_fetch/requirements.txt
+++ b/2023-05-21/youtube_info_fetch/requirements.txt
@@ -0,0 +1,3 @@
 
				+google-api-python-client
			
 
				+google-auth-oauthlib
			
 
				+google-auth-httplib2
			
--- a/2023-05-21/youtube_info_fetch/session_sample.json
+++ b/2023-05-21/youtube_info_fetch/session_sample.json
@@ -0,0 +1,13 @@
 
				+{
			
 
				+	"credentials": {
			
 
				+		"client_id": "",
			
 
				+		"client_secret": "",
			
 
				+		"refresh_token": "",
			
 
				+		"scopes": ["https://www.googleapis.com/auth/youtube.force-ssl"],
			
 
				+		"token": "",
			
 
				+		"token_uri": "https://oauth2.googleapis.com/token"},
			
 
				+		"display_name": "HarlanJI",
			
 
				+		"id": "UCX7yXeFV78vXwMX4uanMldQ",
			
 
				+		"username": "harlanji"
			
 
				+		}
			
 
				+}
			
--- a/2023-05-21/youtube_info_fetch/youtube_info_fetch.py
+++ b/2023-05-21/youtube_info_fetch/youtube_info_fetch.py
@@ -0,0 +1,54 @@
 
				+import os
			
 
				+
			
 
				+import json
			
 
				+
			
 
				+import db_model
			
 
				+import youtube_model
			
 
				+import model
			
 
				+
			
 
				+AUTH_CHANNEL_ID = None
			
 
				+"""
			
 
				+Set to None to use app level access.
			
 
				+
			
 
				+Data fetched without credentials can be more liberally cached and shared among accounts.
			
 
				+
			
 
				+See session_sample.json for the format.
			
 
				+"""
			
 
				+
			
 
				+def main ():
			
 
				+    if not os.path.exists('.data'):
			
 
				+        os.mkdir('.data')
			
 
				+    
			
 
				+    if not os.path.exists('.data/yt-client-secret.json'):
			
 
				+        print('.data/yt-client-secret does not exist. load it up with your youtube client config')
			
 
				+        exit(-1)
			
 
				+    
			
 
				+    db_path = db_model.DB_PATH
			
 
				+    db_model.init_db( db_path )
			
 
				+    
			
 
				+    if not AUTH_CHANNEL_ID:
			
 
				+        print('using app level access')
			
 
				+    
			
 
				+    youtube_user = youtube_model.get_youtube_user(AUTH_CHANNEL_ID)
			
 
				+    
			
 
				+    if AUTH_CHANNEL_ID and not youtube_user:
			
 
				+        print('could not load auth for configured channel. exiting.')
			
 
				+        exit(-1)
			
 
				+    
			
 
				+    
			
 
				+    video_ids = [
			
 
				+        'Z6ih1aKeETk',
			
 
				+        'vE-ViyPXj4Q',
			
 
				+        'jfKfPfyJRdk',
			
 
				+        '5d7e9lj8BQw',
			
 
				+        '_sSBKm-CDNU'
			
 
				+    ]
			
 
				+    
			
 
				+    
			
 
				+    vid_infos = model.get_video_infos(video_ids, youtube_user=youtube_user, db_path=db_path)
			
 
				+    
			
 
				+    print(f'got videos, count={len(vid_infos)}')
			
 
				+    #print(json.dumps(vid_infos, indent=2))
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/2023-05-21/youtube_info_fetch/youtube_model.py
+++ b/2023-05-21/youtube_info_fetch/youtube_model.py
@@ -0,0 +1,54 @@
 
				+import os
			
 
				+
			
 
				+import json
			
 
				+
			
 
				+import google.oauth2.credentials
			
 
				+import googleapiclient.discovery
			
 
				+
			
 
				+SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
			
 
				+API_SERVICE_NAME = 'youtube'
			
 
				+API_VERSION = 'v3'
			
 
				+
			
 
				+VIDEO_PARTS = 'snippet,contentDetails,liveStreamingDetails,statistics,recordingDetails'.split(',')
			
 
				+
			
 
				+
			
 
				+def get_youtube_builder (youtube_user = None):
			
 
				+    
			
 
				+    if youtube_user:
			
 
				+        
			
 
				+        credentials = google.oauth2.credentials.Credentials(
			
 
				+          **youtube_user['credentials'])
			
 
				+        
			
 
				+        youtube = googleapiclient.discovery.build(
			
 
				+          API_SERVICE_NAME, API_VERSION, credentials=credentials)
			
 
				+    else:
			
 
				+        print('using developer key for YT api')
			
 
				+        
			
 
				+        developer_key = os.environ.get('GOOGLE_DEVELOPER_KEY')
			
 
				+        youtube = googleapiclient.discovery.build(
			
 
				+          API_SERVICE_NAME, API_VERSION, developerKey=developer_key) 
			
 
				+    
			
 
				+    return youtube
			
 
				+
			
 
				+def get_youtube_user (channel_id):
			
 
				+    session_path = f'.data/session_{channel_id}.json'
			
 
				+    if os.path.exists(session_path):
			
 
				+        with open(session_path, 'rt', encoding='utf-8') as f:
			
 
				+            return json.load(f)
			
 
				+    else:
			
 
				+        print(f'No config for channel {channel_id}')
			
 
				+        return None
			
 
				+
			
 
				+def fetch_video_infos (video_ids, video_parts=VIDEO_PARTS, max_results=50, youtube_user=None):
			
 
				+    youtube = get_youtube_builder(youtube_user)
			
 
				+    
			
 
				+    # FIXME add pagination
			
 
				+    if len(video_ids) > max_results or max_results > 50:
			
 
				+        raise Exception('requesting more videos than max results or supported and pagination is not supported.')
			
 
				+    
			
 
				+    videos = youtube.videos().list(id=video_ids,
			
 
				+        part=','.join(video_parts),
			
 
				+        maxResults=max_results
			
 
				+        ).execute()
			
 
				+    
			
 
				+    return videos['items']