commit 115720302cf3d7723c47fa9b1a126417a54d9bd7 Author: Nico Date: Sun Feb 1 00:04:37 2026 +1100 sekaipedia-character-card-download: init diff --git a/sekaipedia-character-card-download/.gitignore b/sekaipedia-character-card-download/.gitignore new file mode 100644 index 0000000..c10d570 --- /dev/null +++ b/sekaipedia-character-card-download/.gitignore @@ -0,0 +1,3 @@ +out +venv +__pycache__ diff --git a/sekaipedia-character-card-download/main.py b/sekaipedia-character-card-download/main.py new file mode 100644 index 0000000..2a33544 --- /dev/null +++ b/sekaipedia-character-card-download/main.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +from time import sleep +from bs4 import BeautifulSoup +import requests +from platform import python_version +import platform +import os + + +useragent = f"sekaipedia character card scraping bot (python {python_version()}, os {platform.system()}, host {platform.node()})" +outdir = "out" +print("using user agent:", useragent) +print("using out dir:", outdir) +os.makedirs + +print("input the character card url from sekaipedia you want to scrape (eg. https://www.sekaipedia.org/wiki/Asahina_Mafuyu/Cards)") +characterUrl = input("> ") +r = requests.get(characterUrl, headers={"user-agent": useragent}) +if len(r.text) <= 500: + # Print output incase of failures from wiki + print(r.text) +soup = BeautifulSoup(r.text, 'html.parser') + +def downloadWikiImage(title, page, name): + # go to image's wiki page (to get full resolution) + cardArt = "https://sekaipedia.org"+str(page.find("a", title=title).get("href")) + r = requests.get(cardArt, headers={"user-agent": useragent}) + + imagePageSoup = BeautifulSoup(r.text, 'html.parser') + imageLink = "https:"+str(imagePageSoup.find("a", class_="internal").get("href")) + + print(f"found high resolution card {title} for {name} at {imageLink}") + + # download files + dldir = os.path.join(outdir, name) + os.makedirs(dldir, exist_ok=True) + + r = requests.get(imageLink, headers={"user-agent": useragent}) + with open(os.path.join(dldir, f"{title}.png"), mode="wb") as file: + file.write(r.content) + + +for table in soup.find_all(class_="wikitable"): + for link in table.find_all('a'): + if link.contents[0].name != None: + continue + + # go to card page + cardlink = "https://sekaipedia.org"+str(link.get('href')) + r = requests.get(cardlink, headers={"user-agent": useragent}) + + cardPageSoup = BeautifulSoup(r.text, 'html.parser') + try: + cardName = cardPageSoup.find("span", class_="mw-page-title-main").contents[0] + except: + cardName = cardPageSoup.find("h1", class_="firstHeading").contents[0] + + if os.path.exists(os.path.join(outdir, cardName)): + print(f"already downloaded cards for {cardName}") + continue + + print("getting cards for:", cardName) + downloadWikiImage("Art", cardPageSoup, cardName) + + try: + downloadWikiImage("Trained art", cardPageSoup, cardName) + except: + print(f"Card {cardName} does not have a Trained Art") + + sleep(3) diff --git a/sekaipedia-character-card-download/requirements.txt b/sekaipedia-character-card-download/requirements.txt new file mode 100644 index 0000000..d87d9aa --- /dev/null +++ b/sekaipedia-character-card-download/requirements.txt @@ -0,0 +1,8 @@ +beautifulsoup4==4.14.3 +certifi==2026.1.4 +charset-normalizer==3.4.4 +idna==3.11 +requests==2.32.5 +soupsieve==2.8.3 +typing_extensions==4.15.0 +urllib3==2.6.3