summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAki <please@ignore.pl>2022-05-24 23:38:27 +0200
committerAki <please@ignore.pl>2022-05-24 23:39:57 +0200
commit28e237004f599bc3fda78a9d50482319e8b50741 (patch)
treec54c5548b9b83543a1e513a100ea0c5ecd5228e3
parent902e445daf9cc66d88c644899c4e3b7916f95c4f (diff)
downloadsalvager-28e237004f599bc3fda78a9d50482319e8b50741.zip
salvager-28e237004f599bc3fda78a9d50482319e8b50741.tar.gz
salvager-28e237004f599bc3fda78a9d50482319e8b50741.tar.bz2
Committed changes from tinkering
* Documented most of the functions * Divided implementation into more distinct parts * Loosened requirements on system and date regex
-rw-r--r--scrap.py61
1 files changed, 38 insertions, 23 deletions
diff --git a/scrap.py b/scrap.py
index 07e4036..769271a 100644
--- a/scrap.py
+++ b/scrap.py
@@ -9,10 +9,13 @@ import requests
OWNER_HREF = re.compile(r"/(?:corporation|alliance)/(\d+)/?")
-SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/$")
+SYSTEM_AND_DATE = re.compile(r"/(\d+)/(\d+)/?$")
def get_hash(kill):
+ """
+ Looks up and returns hash of the *kill* using Zkillboard's API.
+ """
response = requests.get("https://zkillboard.com/api/killID/{}/".format(kill))
response.raise_for_status()
data = response.json()
@@ -22,6 +25,9 @@ def get_hash(kill):
class RelatedParser(HTMLParser):
+ """
+ Reads kill IDs and teams from Zkillboard's related kills page.
+ """
def __init__(self):
super().__init__()
self._team = 0
@@ -37,7 +43,7 @@ class RelatedParser(HTMLParser):
if tag == "tr" and attrs.get("class", "").lower() == "killlistrow" and self._team > 0:
self._flush()
killid = attrs.get("killid", "")
- self._current = (self._team, killid, None)
+ self._current = (killid, self._team, None)
if tag == "a" and self._team > 0 and self._current:
match = OWNER_HREF.match(attrs.get("href", ""))
@@ -52,42 +58,51 @@ class RelatedParser(HTMLParser):
@property
def kills(self):
+ """
+ Returns all kills found by the parser along with their team and the ID of the victim.
+ """
self._flush()
return self._kills
-def main():
- parser = argparse.ArgumentParser()
- parser.add_argument("url")
- parser.add_argument("-o", "--output")
- args = parser.parse_args()
-
- response = requests.get(args.url)
+def get_related_kills(url):
+ response = requests.get(url)
response.raise_for_status()
page = response.text
-
related = RelatedParser()
related.feed(page)
-
killmails = []
teams = (set(), set())
- for team, kill, owner in related.kills:
+ for kill, team, victim in related.kills:
time.sleep(1.05) # Zkillboard is very sensitive.
killmails.append({"id": int(kill), "hash": get_hash(kill)})
destination = teams[team - 1]
- destination.add(int(owner))
+ destination.add(int(victim))
+ return {"killmails": killmails, "teams": list(map(list, teams))}
+
+def output_name(args):
+ """
+ Generates name of the output file based on the CLI *args*.
+ """
if args.output:
- filename = args.output
- else:
- match = SYSTEM_AND_DATE.search(args.url)
- if match:
- filename = "{}_{}.json".format(*match.groups())
- else:
- filename = "scrapped.json"
-
- with open(filename, "w") as file:
- file.write(json.dumps({"killmails": killmails, "teams": tuple(map(list, teams))}))
+ return args.output
+ match = SYSTEM_AND_DATE.search(args.url)
+ if match:
+ return "{}_{}.json".format(*match.groups())
+ return "a.json"
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("url")
+ parser.add_argument("-o", "--output")
+ args = parser.parse_args()
+ snapshot = get_related_kills(args.url)
+ filename = output_name(args)
+ with open(filename, "w") as fd:
+ fd.write(json.dumps(snapshot))
+ fd.write("\n")
if __name__ == "__main__":