import argparse import json import re import sys import time import requests KILL_HREF = re.compile(r"href=\"/kill/(\d+)/\"") SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/$") def unique_kills_in(page): so_far = set() for match in KILL_HREF.finditer(page): kill = match.group(1) if kill not in so_far: so_far.add(kill) yield kill def get_hash(kill): response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill)) response.raise_for_status() data = response.json() if len(data) > 1: raise ValueError() return data[0]["zkb"]["hash"] def main(): parser = argparse.ArgumentParser() parser.add_argument("url") parser.add_argument("-o", "--output") args = parser.parse_args() response = requests.get(args.url) response.raise_for_status() page = response.text output = [] for kill in unique_kills_in(page): time.sleep(1.05) # Zkillboard is very sensitive. output.append({"id": kill, "hash": get_hash(kill)}) if args.output: filename = args.output else: match = SYSTEM_AND_DATE.search(args.url) if match: filename = "{}_{}.json".format(*match.groups()) else: filename = "scrapped.json" with open(filename, "w") as file: file.write(json.dumps(output)) if __name__ == "__main__": main()