Added socket timeout of two minutes

2022-01-30 07:11:15 +01:00
parent 7c9caf99c2
commit a6cbc0f4ae
1 changed files with 61 additions and 52 deletions
--- a/grab.py
+++ b/grab.py
@@ -8,6 +8,7 @@ import re
 from pathlib import Path
 from datetime import datetime
 import time
+import socket

 # SYNPOSIS:
 # To download posts from an artist:
@@ -169,6 +170,8 @@ image_request_headers = [
    ('accept-language', 'de-DE,de;q=0.9')
 ]

+# 2 minute timeout in case something gets stuck.
+socket.setdefaulttimeout(120)

 artist_name = str.lower(sys.argv[1])

@@ -186,60 +189,66 @@ Path("./logs/").mkdir(parents=True, exist_ok=True)
 # Request project info for artist
 lastPageReached = False
 pageCounter = 1
-while not lastPageReached:
-    logMsg(f"Fetching page {pageCounter} of {artist_name}...", "okndl")
-    projects_data = requests.get(f"https://www.artstation.com/users/{artist_name}/projects.json?page={pageCounter}", headers=project_fetch_headers)
-    projects = projects_data.json()["data"]
+try:
+    while not lastPageReached:
+        logMsg(f"Fetching page {pageCounter} of {artist_name}...", "okndl")
+        projects_data = requests.get(f"https://www.artstation.com/users/{artist_name}/projects.json?page={pageCounter}", headers=project_fetch_headers)
+        projects = projects_data.json()["data"]

-    page_num_projects = len(projects)
+        page_num_projects = len(projects)

-    lastPageReached = page_num_projects < 50 # Each full page contains 50 projects. If it has less than 50, it is the last page
+        lastPageReached = page_num_projects < 50 # Each full page contains 50 projects. If it has less than 50, it is the last page

-    if not lastPageReached:
-        pageCounter = pageCounter + 1
-        logMsg(f"Page contains {page_num_projects} projects...", "okndl")
-    else:
-        logMsg(f"Page contains {page_num_projects} projects... That's the last page!", "okndl")
-
-
-    # For each project in all of the artists projects
-    for project in projects:
-        project_name    = project["title"]
-        project_hash_id = project["hash_id"]
-
-        logMsg(f"Found project '{project_name}' with id {project_hash_id}. Fetching more info about it...", "okndl")
-
-        # Have we already downloaded this post?
-        if not isPostAlreadySaved(project_hash_id):
-
-            # Fetch information about the project
-            project_info = requests.get(f"https://www.artstation.com/projects/{project_hash_id}.json", headers=project_fetch_headers)
-            assets = project_info.json()["assets"]
-
-            # For each asset in the project (might be multiple images)
-            for asset in assets:
-                asset_type = asset["asset_type"]
-
-                # If the asset is an image
-                if asset_type == "image":
-                    asset_image_url = asset["image_url"]
-                    asset_position = asset["position"]
-                    
-                    # Generate a download filename
-                    filename = artist_directory + slugify(project_name[:60] + "_" + project_hash_id + "_" + str(asset_position)) + "." + extensionFromUrl(asset_image_url)
-
-                    logMsg(f"Found image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Downloading to '{filename}'...", "okdl")
-
-                    # Download it
-                    downloadMedia(asset_image_url, filename)
-                else:
-                    logMsg(f"Found non-image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Skipping...", "okdl")
-
-            # After downloading all assets, mark the project as downloaded.
-            markPostAsSaved(project_hash_id)
-
-        # Project is already downloaded
+        if not lastPageReached:
+            pageCounter = pageCounter + 1
+            logMsg(f"Page contains {page_num_projects} projects...", "okndl")
        else:
-            logMsg(f"Skipping project '{project_name}' [{project_hash_id}] because it is already downloaded.", "okndl")
+            logMsg(f"Page contains {page_num_projects} projects... That's the last page!", "okndl")

-logMsg(f"Finished all pages of {artist_name}... Total pages of this artist scanned: {pageCounter}", "okndl")
+
+        # For each project in all of the artists projects
+        for project in projects:
+            project_name    = project["title"]
+            project_hash_id = project["hash_id"]
+
+            logMsg(f"Found project '{project_name}' with id {project_hash_id}. Fetching more info about it...", "okndl")
+
+            # Have we already downloaded this post?
+            if not isPostAlreadySaved(project_hash_id):
+
+                # Fetch information about the project
+                project_info = requests.get(f"https://www.artstation.com/projects/{project_hash_id}.json", headers=project_fetch_headers)
+                assets = project_info.json()["assets"]
+
+                # For each asset in the project (might be multiple images)
+                for asset in assets:
+                    asset_type = asset["asset_type"]
+
+                    # If the asset is an image
+                    if asset_type == "image":
+                        asset_image_url = asset["image_url"]
+                        asset_position = asset["position"]
+                        
+                        # Generate a download filename
+                        filename = artist_directory + slugify(project_name[:60] + "_" + project_hash_id + "_" + str(asset_position)) + "." + extensionFromUrl(asset_image_url)
+
+                        logMsg(f"Found image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Downloading to '{filename}'...", "okdl")
+
+                        # Download it
+                        downloadMedia(asset_image_url, filename)
+                    else:
+                        logMsg(f"Found non-image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Skipping...", "okdl")
+
+                # After downloading all assets, mark the project as downloaded.
+                markPostAsSaved(project_hash_id)
+
+            # Project is already downloaded
+            else:
+                logMsg(f"Skipping project '{project_name}' [{project_hash_id}] because it is already downloaded.", "okndl")
+
+    logMsg(f"Finished all pages of {artist_name}... Total pages of this artist scanned: {pageCounter}", "okndl")
+
+except socket.timeout:
+    logMsg("Socket timeout of two minutes reached! We'll get 'em next time, boys!", "err")
+except:
+    logMsg("Failed for some reason!", "err")