diff options
Diffstat (limited to 'scrap.py')
-rw-r--r-- | scrap.py | 77 |
1 files changed, 3 insertions, 74 deletions
@@ -3,92 +3,21 @@ import json import re import sys import time -from html.parser import HTMLParser - -import requests from salvager import esi +from salvager import zkill -OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?") SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/?$") -def get_hash(kill): - """ - Looks up and returns hash of the *kill* using Zkillboard's API. - """ - response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill)) - response.raise_for_status() - data = response.json() - if len(data) > 1: - raise ValueError() - return data[0]['zkb']['hash'] - - -class RelatedParser(HTMLParser): - """ - Reads kill IDs and teams from Zkillboard's related kills page. - """ - def __init__(self): - super().__init__() - self._team = 0 - self._kills = set() - self._current = None - - def handle_starttag(self, tag, attrs): - attrs = dict(attrs) - if tag == "table" and attrs.get("id", "").lower() == "killlist": - self._team += 1 - if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0: - self._flush() - killid = attrs.get("killid", "") - self._current = (killid, self._team, None) - if tag == "a" and self._team > 0 and self._current: - match = OWNER_HREF.match(attrs.get("href", "")) - if match: - self._current = (*self._current[:2], match.group(1)) - self._flush() - - def _flush(self): - if self._current and all(self._current): - self._kills.add(self._current) - self._current = None - - @property - def kills(self): - """ - Returns all kills found by the parser along with their team and the ID of the victim. - """ - self._flush() - return self._kills - - -def get_related_kills(url): - """ - Builds basic snapshot containing all killmails from battle report at *url*. - """ - response = requests.get(url) - response.raise_for_status() - page = response.text - related = RelatedParser() - related.feed(page) - killmails = [] - teams = (set(), set()) - for kill, team, victim in related.kills: - killmails.append({'killmail_id': int(kill)}) - destination = teams[team - 1] - destination.add(int(victim)) - return {'killmails': killmails, 'teams': list(map(list, teams))} - - def expand_hashes(snapshot): """ Expands killmails in *snapshot* IN PLACE by adding their hash based on information from Zkillboard. """ for killmail in snapshot['killmails']: time.sleep(1.05) # Zkillboard is very sensitive. - killmail['hash'] = get_hash(killmail['killmail_id']) + killmail['hash'] = zkill.hash(killmail['killmail_id']) return snapshot @@ -126,7 +55,7 @@ def main(): parser.add_argument("-o", "--output") parser.add_argument("--pretty", action='store_true') args = parser.parse_args() - snapshot = get_related_kills(args.url) + snapshot = zkill.parse_battle_report(args.url) expand_hashes(snapshot) expand_details(snapshot) snapshot['ships'] = get_ships(snapshot) |