diff options
author | W. Kosior <koszko@koszko.org> | 2025-01-09 01:17:58 +0100 |
---|---|---|
committer | W. Kosior <koszko@koszko.org> | 2025-01-09 01:17:58 +0100 |
commit | d63572395f027b7776d57e62d0019800e3c4657d (patch) | |
tree | 4680e6d4e0dbaeb46c630a6d85878deae2a60929 | |
parent | 7f5a6f3ce26eae52eb44cda97d41a4f54755e0fa (diff) | |
download | AGH-threat-intel-course-d63572395f027b7776d57e62d0019800e3c4657d.tar.gz AGH-threat-intel-course-d63572395f027b7776d57e62d0019800e3c4657d.zip |
fix motives scraping
-rwxr-xr-x | scrape_blackobird_groups_info.py | 15 |
1 files changed, 3 insertions, 12 deletions
diff --git a/scrape_blackobird_groups_info.py b/scrape_blackobird_groups_info.py index 6f3bae6..43d91ca 100755 --- a/scrape_blackobird_groups_info.py +++ b/scrape_blackobird_groups_info.py @@ -86,18 +86,9 @@ def page_to_group(page): motives = [] - while True: - node = node.next_sibling - - if node.name: - continue - - if node.text == ("\nInformation\xa0") or "\n" in node.text: - break - - new_motives = node.next_sibling.next_sibling.split(",") - new_motives = [heavy_sanitize(text) for text in new_motives] - motives.extend(filter(None, new_motives)) + new_motives = node.next_sibling.next_sibling.split(",") + new_motives = [heavy_sanitize(text) for text in new_motives] + motives.extend(filter(None, new_motives)) while (node.text != ("\nInformation\xa0") and not node.text.startswith("\nSectors:")): |