2022-04-07 13:53:09 +02:00
#!/bin/python3
2022-01-30 03:38:53 +01:00
import requests
import mimetypes
import sys
from pathlib import Path
from datetime import datetime
import time
2022-01-30 07:11:15 +01:00
import socket
2022-01-30 03:38:53 +01:00
2022-01-30 19:19:50 +01:00
from util import *
from headers import *
2022-01-30 03:38:53 +01:00
# SYNPOSIS:
# To download posts from an artist:
2022-01-30 21:02:18 +01:00
# python3 grab-artist.py mixppl
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
# 2 minute timeout in case something gets stuck.
socket . setdefaulttimeout ( 120 )
2022-01-30 03:38:53 +01:00
artist_name = str . lower ( sys . argv [ 1 ] )
# Create artist directory if it doesn't exist
artist_directory = " ./downloads/ " + slugify ( artist_name ) + " / "
Path ( artist_directory ) . mkdir ( parents = True , exist_ok = True )
# Create directory for already saved posts, and generate filename
Path ( " ./already_saved/ " ) . mkdir ( parents = True , exist_ok = True )
# Create directory for logging, and generate filename
Path ( " ./logs/ " ) . mkdir ( parents = True , exist_ok = True )
# Request project info for artist
2022-01-30 05:59:13 +01:00
lastPageReached = False
pageCounter = 1
2022-01-30 07:11:15 +01:00
try :
while not lastPageReached :
2022-01-30 19:19:50 +01:00
logMsg ( f " Fetching page { pageCounter } of { artist_name } ... " , " okndl " , artist_name )
2022-01-30 07:11:15 +01:00
projects_data = requests . get ( f " https://www.artstation.com/users/ { artist_name } /projects.json?page= { pageCounter } " , headers = project_fetch_headers )
projects = projects_data . json ( ) [ " data " ]
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
page_num_projects = len ( projects )
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
lastPageReached = page_num_projects < 50 # Each full page contains 50 projects. If it has less than 50, it is the last page
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
if not lastPageReached :
pageCounter = pageCounter + 1
2022-01-30 19:19:50 +01:00
logMsg ( f " Page contains { page_num_projects } projects... " , " okndl " , artist_name )
2022-01-30 07:11:15 +01:00
else :
2022-01-30 19:19:50 +01:00
logMsg ( f " Page contains { page_num_projects } projects... That ' s the last page! " , " okndl " , artist_name )
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
# For each project in all of the artists projects
for project in projects :
project_name = project [ " title " ]
project_hash_id = project [ " hash_id " ]
2022-01-30 03:38:53 +01:00
2022-01-30 19:19:50 +01:00
logMsg ( f " Found project ' { project_name } ' with id { project_hash_id } . Fetching more info about it... " , " okndl " , artist_name )
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
# Have we already downloaded this post?
2022-01-30 21:02:18 +01:00
if not isPostAlreadySaved ( project_hash_id , artist_name ) :
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
# Fetch information about the project
project_info = requests . get ( f " https://www.artstation.com/projects/ { project_hash_id } .json " , headers = project_fetch_headers )
assets = project_info . json ( ) [ " assets " ]
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
# For each asset in the project (might be multiple images)
for asset in assets :
asset_type = asset [ " asset_type " ]
2022-04-19 13:14:14 +08:00
asset_position = asset [ " position " ]
2022-01-30 03:38:53 +01:00
2022-01-30 07:11:15 +01:00
# If the asset is an image
if asset_type == " image " :
asset_image_url = asset [ " image_url " ]
# Generate a download filename
filename = artist_directory + slugify ( project_name [ : 60 ] + " _ " + project_hash_id + " _ " + str ( asset_position ) ) + " . " + extensionFromUrl ( asset_image_url )
2022-01-30 05:59:13 +01:00
2022-01-30 19:19:50 +01:00
logMsg ( f " Found image-asset for project ' { project_name } ' [ { project_hash_id } ] at position { asset_position } . Downloading to ' { filename } ' ... " , " okdl " , artist_name )
2022-01-30 05:59:13 +01:00
2022-01-30 07:11:15 +01:00
# Download it
downloadMedia ( asset_image_url , filename )
else :
2022-01-30 19:19:50 +01:00
logMsg ( f " Found non-image-asset for project ' { project_name } ' [ { project_hash_id } ] at position { asset_position } . Skipping... " , " okdl " , artist_name )
2022-01-30 05:59:13 +01:00
2022-01-30 07:11:15 +01:00
# After downloading all assets, mark the project as downloaded.
2022-01-30 21:02:18 +01:00
markPostAsSaved ( project_hash_id , artist_name )
2022-01-30 05:59:13 +01:00
2022-01-30 07:11:15 +01:00
# Project is already downloaded
else :
2022-01-30 19:19:50 +01:00
logMsg ( f " Skipping project ' { project_name } ' [ { project_hash_id } ] because it is already downloaded. " , " okndl " , artist_name )
2022-01-30 07:11:15 +01:00
2022-01-30 19:19:50 +01:00
logMsg ( f " Finished all pages of { artist_name } ... Total pages of this artist scanned: { pageCounter } " , " okndl " , artist_name )
2022-01-30 05:59:13 +01:00
2022-01-30 21:02:18 +01:00
except socket . timeout as exc :
2022-01-30 19:19:50 +01:00
logMsg ( " Socket timeout of two minutes reached! We ' ll get ' em next time, boys! " , " err " , artist_name )
2022-01-30 21:02:18 +01:00
except BaseException as exc :
logMsg ( " Failed for some reason!: " + repr ( exc ) , " err " , artist_name )