Added socket timeout of two minutes
This commit is contained in:
parent
7c9caf99c2
commit
a6cbc0f4ae
113
grab.py
113
grab.py
@ -8,6 +8,7 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import time
|
import time
|
||||||
|
import socket
|
||||||
|
|
||||||
# SYNPOSIS:
|
# SYNPOSIS:
|
||||||
# To download posts from an artist:
|
# To download posts from an artist:
|
||||||
@ -169,6 +170,8 @@ image_request_headers = [
|
|||||||
('accept-language', 'de-DE,de;q=0.9')
|
('accept-language', 'de-DE,de;q=0.9')
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# 2 minute timeout in case something gets stuck.
|
||||||
|
socket.setdefaulttimeout(120)
|
||||||
|
|
||||||
artist_name = str.lower(sys.argv[1])
|
artist_name = str.lower(sys.argv[1])
|
||||||
|
|
||||||
@ -186,60 +189,66 @@ Path("./logs/").mkdir(parents=True, exist_ok=True)
|
|||||||
# Request project info for artist
|
# Request project info for artist
|
||||||
lastPageReached = False
|
lastPageReached = False
|
||||||
pageCounter = 1
|
pageCounter = 1
|
||||||
while not lastPageReached:
|
try:
|
||||||
logMsg(f"Fetching page {pageCounter} of {artist_name}...", "okndl")
|
while not lastPageReached:
|
||||||
projects_data = requests.get(f"https://www.artstation.com/users/{artist_name}/projects.json?page={pageCounter}", headers=project_fetch_headers)
|
logMsg(f"Fetching page {pageCounter} of {artist_name}...", "okndl")
|
||||||
projects = projects_data.json()["data"]
|
projects_data = requests.get(f"https://www.artstation.com/users/{artist_name}/projects.json?page={pageCounter}", headers=project_fetch_headers)
|
||||||
|
projects = projects_data.json()["data"]
|
||||||
|
|
||||||
page_num_projects = len(projects)
|
page_num_projects = len(projects)
|
||||||
|
|
||||||
lastPageReached = page_num_projects < 50 # Each full page contains 50 projects. If it has less than 50, it is the last page
|
lastPageReached = page_num_projects < 50 # Each full page contains 50 projects. If it has less than 50, it is the last page
|
||||||
|
|
||||||
if not lastPageReached:
|
if not lastPageReached:
|
||||||
pageCounter = pageCounter + 1
|
pageCounter = pageCounter + 1
|
||||||
logMsg(f"Page contains {page_num_projects} projects...", "okndl")
|
logMsg(f"Page contains {page_num_projects} projects...", "okndl")
|
||||||
else:
|
|
||||||
logMsg(f"Page contains {page_num_projects} projects... That's the last page!", "okndl")
|
|
||||||
|
|
||||||
|
|
||||||
# For each project in all of the artists projects
|
|
||||||
for project in projects:
|
|
||||||
project_name = project["title"]
|
|
||||||
project_hash_id = project["hash_id"]
|
|
||||||
|
|
||||||
logMsg(f"Found project '{project_name}' with id {project_hash_id}. Fetching more info about it...", "okndl")
|
|
||||||
|
|
||||||
# Have we already downloaded this post?
|
|
||||||
if not isPostAlreadySaved(project_hash_id):
|
|
||||||
|
|
||||||
# Fetch information about the project
|
|
||||||
project_info = requests.get(f"https://www.artstation.com/projects/{project_hash_id}.json", headers=project_fetch_headers)
|
|
||||||
assets = project_info.json()["assets"]
|
|
||||||
|
|
||||||
# For each asset in the project (might be multiple images)
|
|
||||||
for asset in assets:
|
|
||||||
asset_type = asset["asset_type"]
|
|
||||||
|
|
||||||
# If the asset is an image
|
|
||||||
if asset_type == "image":
|
|
||||||
asset_image_url = asset["image_url"]
|
|
||||||
asset_position = asset["position"]
|
|
||||||
|
|
||||||
# Generate a download filename
|
|
||||||
filename = artist_directory + slugify(project_name[:60] + "_" + project_hash_id + "_" + str(asset_position)) + "." + extensionFromUrl(asset_image_url)
|
|
||||||
|
|
||||||
logMsg(f"Found image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Downloading to '{filename}'...", "okdl")
|
|
||||||
|
|
||||||
# Download it
|
|
||||||
downloadMedia(asset_image_url, filename)
|
|
||||||
else:
|
|
||||||
logMsg(f"Found non-image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Skipping...", "okdl")
|
|
||||||
|
|
||||||
# After downloading all assets, mark the project as downloaded.
|
|
||||||
markPostAsSaved(project_hash_id)
|
|
||||||
|
|
||||||
# Project is already downloaded
|
|
||||||
else:
|
else:
|
||||||
logMsg(f"Skipping project '{project_name}' [{project_hash_id}] because it is already downloaded.", "okndl")
|
logMsg(f"Page contains {page_num_projects} projects... That's the last page!", "okndl")
|
||||||
|
|
||||||
logMsg(f"Finished all pages of {artist_name}... Total pages of this artist scanned: {pageCounter}", "okndl")
|
|
||||||
|
# For each project in all of the artists projects
|
||||||
|
for project in projects:
|
||||||
|
project_name = project["title"]
|
||||||
|
project_hash_id = project["hash_id"]
|
||||||
|
|
||||||
|
logMsg(f"Found project '{project_name}' with id {project_hash_id}. Fetching more info about it...", "okndl")
|
||||||
|
|
||||||
|
# Have we already downloaded this post?
|
||||||
|
if not isPostAlreadySaved(project_hash_id):
|
||||||
|
|
||||||
|
# Fetch information about the project
|
||||||
|
project_info = requests.get(f"https://www.artstation.com/projects/{project_hash_id}.json", headers=project_fetch_headers)
|
||||||
|
assets = project_info.json()["assets"]
|
||||||
|
|
||||||
|
# For each asset in the project (might be multiple images)
|
||||||
|
for asset in assets:
|
||||||
|
asset_type = asset["asset_type"]
|
||||||
|
|
||||||
|
# If the asset is an image
|
||||||
|
if asset_type == "image":
|
||||||
|
asset_image_url = asset["image_url"]
|
||||||
|
asset_position = asset["position"]
|
||||||
|
|
||||||
|
# Generate a download filename
|
||||||
|
filename = artist_directory + slugify(project_name[:60] + "_" + project_hash_id + "_" + str(asset_position)) + "." + extensionFromUrl(asset_image_url)
|
||||||
|
|
||||||
|
logMsg(f"Found image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Downloading to '{filename}'...", "okdl")
|
||||||
|
|
||||||
|
# Download it
|
||||||
|
downloadMedia(asset_image_url, filename)
|
||||||
|
else:
|
||||||
|
logMsg(f"Found non-image-asset for project '{project_name}' [{project_hash_id}] at position {asset_position}. Skipping...", "okdl")
|
||||||
|
|
||||||
|
# After downloading all assets, mark the project as downloaded.
|
||||||
|
markPostAsSaved(project_hash_id)
|
||||||
|
|
||||||
|
# Project is already downloaded
|
||||||
|
else:
|
||||||
|
logMsg(f"Skipping project '{project_name}' [{project_hash_id}] because it is already downloaded.", "okndl")
|
||||||
|
|
||||||
|
logMsg(f"Finished all pages of {artist_name}... Total pages of this artist scanned: {pageCounter}", "okndl")
|
||||||
|
|
||||||
|
except socket.timeout:
|
||||||
|
logMsg("Socket timeout of two minutes reached! We'll get 'em next time, boys!", "err")
|
||||||
|
except:
|
||||||
|
logMsg("Failed for some reason!", "err")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user