summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorW. Kosior <koszko@koszko.org>2025-01-09 01:17:58 +0100
committerW. Kosior <koszko@koszko.org>2025-01-09 01:17:58 +0100
commitd63572395f027b7776d57e62d0019800e3c4657d (patch)
tree4680e6d4e0dbaeb46c630a6d85878deae2a60929
parent7f5a6f3ce26eae52eb44cda97d41a4f54755e0fa (diff)
downloadAGH-threat-intel-course-d63572395f027b7776d57e62d0019800e3c4657d.tar.gz
AGH-threat-intel-course-d63572395f027b7776d57e62d0019800e3c4657d.zip
fix motives scraping
-rwxr-xr-xscrape_blackobird_groups_info.py15
1 files changed, 3 insertions, 12 deletions
diff --git a/scrape_blackobird_groups_info.py b/scrape_blackobird_groups_info.py
index 6f3bae6..43d91ca 100755
--- a/scrape_blackobird_groups_info.py
+++ b/scrape_blackobird_groups_info.py
@@ -86,18 +86,9 @@ def page_to_group(page):
motives = []
- while True:
- node = node.next_sibling
-
- if node.name:
- continue
-
- if node.text == ("\nInformation\xa0") or "\n" in node.text:
- break
-
- new_motives = node.next_sibling.next_sibling.split(",")
- new_motives = [heavy_sanitize(text) for text in new_motives]
- motives.extend(filter(None, new_motives))
+ new_motives = node.next_sibling.next_sibling.split(",")
+ new_motives = [heavy_sanitize(text) for text in new_motives]
+ motives.extend(filter(None, new_motives))
while (node.text != ("\nInformation\xa0")
and not node.text.startswith("\nSectors:")):