import argparse import json import re import sys import time from html.parser import HTMLParser import requests OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?") SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/?$") def get_hash(kill): """ Looks up and returns hash of the *kill* using Zkillboard's API. """ response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill)) response.raise_for_status() data = response.json() if len(data) > 1: raise ValueError() return data[0]["zkb"]["hash"] class RelatedParser(HTMLParser): """ Reads kill IDs and teams from Zkillboard's related kills page. """ def __init__(self): super().__init__() self._team = 0 self._kills = set() self._current = None def handle_starttag(self, tag, attrs): attrs = dict(attrs) if tag == "table" and attrs.get("id", "").lower() == "killlist": self._team += 1 if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0: self._flush() killid = attrs.get("killid", "") self._current = (killid, self._team, None) if tag == "a" and self._team > 0 and self._current: match = OWNER_HREF.match(attrs.get("href", "")) if match: self._current = (*self._current[:2], match.group(1)) self._flush() def _flush(self): if self._current and all(self._current): self._kills.add(self._current) self._current = None @property def kills(self): """ Returns all kills found by the parser along with their team and the ID of the victim. """ self._flush() return self._kills def get_related_kills(url): response = requests.get(url) response.raise_for_status() page = response.text related = RelatedParser() related.feed(page) killmails = [] teams = (set(), set()) for kill, team, victim in related.kills: time.sleep(1.05) # Zkillboard is very sensitive. killmails.append({"id": int(kill), "hash": get_hash(kill)}) destination = teams[team - 1] destination.add(int(victim)) return {"killmails": killmails, "teams": list(map(list, teams))} def output_name(args): """ Generates name of the output file based on the CLI *args*. """ if args.output: return args.output match = SYSTEM_AND_DATE.search(args.url) if match: return "{}_{}.json".format(*match.groups()) return "a.json" def main(): parser = argparse.ArgumentParser() parser.add_argument("url") parser.add_argument("-o", "--output") args = parser.parse_args() snapshot = get_related_kills(args.url) filename = output_name(args) with open(filename, "w") as fd: fd.write(json.dumps(snapshot)) fd.write("\n") if __name__ == "__main__": main()