From a4f7ff63f671f55d408633a4458253c908b16fe3 Mon Sep 17 00:00:00 2001 From: Aki Date: Wed, 25 May 2022 19:01:26 +0200 Subject: Moved zkill interface to own module --- salvager/zkill.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ scrap.py | 77 ++------------------------------------------------- 2 files changed, 86 insertions(+), 74 deletions(-) create mode 100644 salvager/zkill.py diff --git a/salvager/zkill.py b/salvager/zkill.py new file mode 100644 index 0000000..9f30dc1 --- /dev/null +++ b/salvager/zkill.py @@ -0,0 +1,83 @@ +import re +from html.parser import HTMLParser + +import requests + + +OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?") + + +class ZkillError(Exception): + """ + Marks a problem encountered when interfacing with Zkillboard's API. + """ + + +class RelatedParser(HTMLParser): + """ + Reads kill IDs and teams from Zkillboard's related kills page. + """ + def __init__(self): + super().__init__() + self._team = 0 + self._kills = set() + self._current = None + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if tag == "table" and attrs.get("id", "").lower() == "killlist": + self._team += 1 + if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0: + self._flush() + killid = attrs.get("killid", "") + self._current = (killid, self._team, None) + if tag == "a" and self._team > 0 and self._current: + match = OWNER_HREF.match(attrs.get("href", "")) + if match: + self._current = (*self._current[:2], match.group(1)) + self._flush() + + def _flush(self): + if self._current and all(self._current): + self._kills.add(self._current) + self._current = None + + @property + def kills(self): + """ + Returns all kills found by the parser along with their team and the ID of the victim. + """ + self._flush() + return self._kills + + +def hash(kill_id): + """ + Looks up and returns the hash associated with *kill_id* using Zkillboard's API. + """ + response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill_id)) + response.raise_for_status() + data = response.json() + if len(data) == 0: + raise ZkillError("Could not find hash", kill_id) + if len(data) > 1: + raise ZkillError("Too many hashes found", kill_id) + return data[0]['zkb']['hash'] + + +def parse_battle_report(url): + """ + Builds a basic snapshot containing all killmails from the battle report at *url*. + """ + response = requests.get(url) + response.raise_for_status() + page = response.text + related = RelatedParser() + related.feed(page) + killmails = [] + teams = (set(), set()) + for kill, team, victim in related.kills: + killmails.append({'killmail_id': int(kill)}) + destination = teams[team - 1] + destination.add(int(victim)) + return {'killmails': killmails, 'teams': list(map(list, teams))} diff --git a/scrap.py b/scrap.py index 2939451..2f81aa0 100644 --- a/scrap.py +++ b/scrap.py @@ -3,92 +3,21 @@ import json import re import sys import time -from html.parser import HTMLParser - -import requests from salvager import esi +from salvager import zkill -OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?") SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/?$") -def get_hash(kill): - """ - Looks up and returns hash of the *kill* using Zkillboard's API. - """ - response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill)) - response.raise_for_status() - data = response.json() - if len(data) > 1: - raise ValueError() - return data[0]['zkb']['hash'] - - -class RelatedParser(HTMLParser): - """ - Reads kill IDs and teams from Zkillboard's related kills page. - """ - def __init__(self): - super().__init__() - self._team = 0 - self._kills = set() - self._current = None - - def handle_starttag(self, tag, attrs): - attrs = dict(attrs) - if tag == "table" and attrs.get("id", "").lower() == "killlist": - self._team += 1 - if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0: - self._flush() - killid = attrs.get("killid", "") - self._current = (killid, self._team, None) - if tag == "a" and self._team > 0 and self._current: - match = OWNER_HREF.match(attrs.get("href", "")) - if match: - self._current = (*self._current[:2], match.group(1)) - self._flush() - - def _flush(self): - if self._current and all(self._current): - self._kills.add(self._current) - self._current = None - - @property - def kills(self): - """ - Returns all kills found by the parser along with their team and the ID of the victim. - """ - self._flush() - return self._kills - - -def get_related_kills(url): - """ - Builds basic snapshot containing all killmails from battle report at *url*. - """ - response = requests.get(url) - response.raise_for_status() - page = response.text - related = RelatedParser() - related.feed(page) - killmails = [] - teams = (set(), set()) - for kill, team, victim in related.kills: - killmails.append({'killmail_id': int(kill)}) - destination = teams[team - 1] - destination.add(int(victim)) - return {'killmails': killmails, 'teams': list(map(list, teams))} - - def expand_hashes(snapshot): """ Expands killmails in *snapshot* IN PLACE by adding their hash based on information from Zkillboard. """ for killmail in snapshot['killmails']: time.sleep(1.05) # Zkillboard is very sensitive. - killmail['hash'] = get_hash(killmail['killmail_id']) + killmail['hash'] = zkill.hash(killmail['killmail_id']) return snapshot @@ -126,7 +55,7 @@ def main(): parser.add_argument("-o", "--output") parser.add_argument("--pretty", action='store_true') args = parser.parse_args() - snapshot = get_related_kills(args.url) + snapshot = zkill.parse_battle_report(args.url) expand_hashes(snapshot) expand_details(snapshot) snapshot['ships'] = get_ships(snapshot) -- cgit v1.1