From 1ad8d912fac79bac70b0e55d75f63ca951921bb3 Mon Sep 17 00:00:00 2001 From: Aki Date: Tue, 24 May 2022 23:47:05 +0200 Subject: Removed scrapping script in favour of the separate repository --- .gitignore | 6 ++-- scrap.py | 94 -------------------------------------------------------------- 2 files changed, 2 insertions(+), 98 deletions(-) delete mode 100644 scrap.py diff --git a/.gitignore b/.gitignore index 9ea276c..0c0820e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,2 @@ -__pycache__ -*.json -derelict -.derelict +derelict/ +.derelict/ diff --git a/scrap.py b/scrap.py deleted file mode 100644 index 07e4036..0000000 --- a/scrap.py +++ /dev/null @@ -1,94 +0,0 @@ -import argparse -import json -import re -import sys -import time -from html.parser import HTMLParser - -import requests - - -OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?") -SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/$") - - -def get_hash(kill): - response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill)) - response.raise_for_status() - data = response.json() - if len(data) > 1: - raise ValueError() - return data[0]["zkb"]["hash"] - - -class RelatedParser(HTMLParser): - def __init__(self): - super().__init__() - self._team = 0 - self._kills = set() - self._current = None - - def handle_starttag(self, tag, attrs): - attrs = dict(attrs) - - if tag == "table" and attrs.get("id", "").lower() == "killlist": - self._team += 1 - - if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0: - self._flush() - killid = attrs.get("killid", "") - self._current = (self._team, killid, None) - - if tag == "a" and self._team > 0 and self._current: - match = OWNER_HREF.match(attrs.get("href", "")) - if match: - self._current = (*self._current[:2], match.group(1)) - self._flush() - - def _flush(self): - if self._current and all(self._current): - self._kills.add(self._current) - self._current = None - - @property - def kills(self): - self._flush() - return self._kills - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("url") - parser.add_argument("-o", "--output") - args = parser.parse_args() - - response = requests.get(args.url) - response.raise_for_status() - page = response.text - - related = RelatedParser() - related.feed(page) - - killmails = [] - teams = (set(), set()) - for team, kill, owner in related.kills: - time.sleep(1.05) # Zkillboard is very sensitive. - killmails.append({"id": int(kill), "hash": get_hash(kill)}) - destination = teams[team - 1] - destination.add(int(owner)) - - if args.output: - filename = args.output - else: - match = SYSTEM_AND_DATE.search(args.url) - if match: - filename = "{}_{}.json".format(*match.groups()) - else: - filename = "scrapped.json" - - with open(filename, "w") as file: - file.write(json.dumps({"killmails": killmails, "teams": tuple(map(list, teams))})) - - -if __name__ == "__main__": - main() -- cgit v1.1