sekaipedia-character-card-download: init

2026-02-01 00:04:37 +11:00 · 2026-02-01 00:04:37 +11:00 · 115720302c
commit 115720302c
3 changed files with 81 additions and 0 deletions
--- a/sekaipedia-character-card-download/main.py
+++ b/sekaipedia-character-card-download/main.py
@ -0,0 +1,70 @@
+#!/usr/bin/env python
+from time import sleep
+from bs4 import BeautifulSoup
+import requests
+from platform import python_version
+import platform
+import os
+
+
+useragent = f"sekaipedia character card scraping bot (python {python_version()}, os {platform.system()}, host {platform.node()})"
+outdir = "out"
+print("using user agent:", useragent)
+print("using out dir:", outdir)
+os.makedirs
+
+print("input the character card url from sekaipedia you want to scrape (eg. https://www.sekaipedia.org/wiki/Asahina_Mafuyu/Cards)")
+characterUrl = input("> ")
+r = requests.get(characterUrl, headers={"user-agent": useragent})
+if len(r.text) <= 500:
+    # Print output incase of failures from wiki
+    print(r.text)
+soup = BeautifulSoup(r.text, 'html.parser')
+
+def downloadWikiImage(title, page, name):
+    # go to image's wiki page (to get full resolution)
+    cardArt = "https://sekaipedia.org"+str(page.find("a", title=title).get("href"))
+    r = requests.get(cardArt, headers={"user-agent": useragent})
+
+    imagePageSoup = BeautifulSoup(r.text, 'html.parser')
+    imageLink = "https:"+str(imagePageSoup.find("a", class_="internal").get("href"))
+
+    print(f"found high resolution card {title} for {name} at {imageLink}")
+
+    # download files
+    dldir = os.path.join(outdir, name)
+    os.makedirs(dldir, exist_ok=True)
+
+    r = requests.get(imageLink, headers={"user-agent": useragent})
+    with open(os.path.join(dldir, f"{title}.png"), mode="wb") as file:
+        file.write(r.content)
+
+
+for table in soup.find_all(class_="wikitable"):
+    for link in table.find_all('a'):
+        if link.contents[0].name != None:
+            continue
+
+        # go to card page
+        cardlink = "https://sekaipedia.org"+str(link.get('href'))
+        r = requests.get(cardlink, headers={"user-agent": useragent})
+
+        cardPageSoup = BeautifulSoup(r.text, 'html.parser')
+        try:
+            cardName = cardPageSoup.find("span", class_="mw-page-title-main").contents[0]
+        except:
+            cardName = cardPageSoup.find("h1", class_="firstHeading").contents[0]
+
+        if os.path.exists(os.path.join(outdir, cardName)):
+            print(f"already downloaded cards for {cardName}")
+            continue
+
+        print("getting cards for:", cardName)
+        downloadWikiImage("Art", cardPageSoup, cardName)
+
+        try:
+            downloadWikiImage("Trained art", cardPageSoup, cardName)
+        except:
+            print(f"Card {cardName} does not have a Trained Art")
+
+        sleep(3)