remove trailing whitespace

author: W. Kosior <koszko@koszko.org> 2025-01-09 01:18:35 +0100
committer: W. Kosior <koszko@koszko.org> 2025-01-09 01:18:35 +0100
commit: cc93455733b0ee24080e78d1a78b5fe624dc1709 (patch)
tree: e76a306efd0eff7d778ebe8d38ae47073466d28b
parent: d63572395f027b7776d57e62d0019800e3c4657d (diff)
download: AGH-threat-intel-course-cc93455733b0ee24080e78d1a78b5fe624dc1709.tar.gz
AGH-threat-intel-course-cc93455733b0ee24080e78d1a78b5fe624dc1709.zip
1 files changed, 29 insertions, 29 deletions
diff --git a/scrape_blackobird_groups_info.py b/scrape_blackobird_groups_info.py
index 43d91ca..2e99959 100755
--- a/scrape_blackobird_groups_info.py
+++ b/scrape_blackobird_groups_info.py
@@ -25,10 +25,10 @@ class Group:
 
 def sanitize(text):
     text = text.replace("\xa0", "").strip()
-    
+
     if text and text[-1] == "(":
         text = text[:-1].strip()
-    
+
     return text
 
 def heavy_sanitize(text):
@@ -49,20 +49,20 @@ def page_to_group(page):
         node = node.next_sibling
         if node is None:
             return None
-    
+
     name_text = node.previous_sibling.previous_sibling.text
     name = sanitize(name_text.split(",")[0])
     if name in groups_found:
         return None
-    
+
     def incomplete_data_abort(what):
         print(f"Incomplete data for group {name} ({what}).", file=sys.stderr)
-    
+
     aliases = []
-    
+
     while True:
         node = node.next_sibling
-        
+
         if node.name == "i":
             alias = sanitize(node.previous_sibling.text)
             if alias in groups_found:
@@ -70,78 +70,78 @@ def page_to_group(page):
                       file=sys.stderr)
             elif alias and alias != name:
                 aliases.append(alias)
-        
+
         elif node.text == ("\nInformation\xa0"):
             return incomplete_data_abort("no country")
-        
+
         elif node.text == "\nCountry\xa0":
             break
-    
+
     origin = sanitize(node.next_sibling.next_sibling)
-    
+
     while node.text != "\nMotivation\xa0":
         if node.text == ("\nInformation\xa0"):
             return incomplete_data_abort("no motivation")
         node = node.next_sibling
-    
+
     motives = []
-    
+
     new_motives = node.next_sibling.next_sibling.split(",")
     new_motives = [heavy_sanitize(text) for text in new_motives]
     motives.extend(filter(None, new_motives))
-    
+
     while (node.text != ("\nInformation\xa0")
            and not node.text.startswith("\nSectors:")):
         node = node.next_sibling
-    
+
     sectors = []
-    
+
     while True:
         if node.name:
             node = node.next_sibling
             continue
-        
+
         if (node.text == "\nInformation\xa0"
             or "Countries:" in node.text
             or ("\n" in node.text and not node.text.startswith("\nSectors:"))):
             break
-        
+
         sectors_text = node.text
         if "Sectors:" in sectors_text:
             sectors_text = sectors_text.split("Sectors:")[1]
-        
+
         for text in re.split("[.]|;|,|[ \xa0]and|and[ \xa0]", sectors_text):
             sector = heavy_sanitize(text)
             if sector:
                 sectors.append(sector)
-        
+
         node = node.next_sibling
-    
+
     while (node.text != ("\nInformation\xa0")
            and "Countries:" not in node.text):
         node = node.next_sibling
-    
+
     countries = []
-    
+
     while True:
         if node.name:
             node = node.next_sibling
             continue
-        
+
         if node.text == "\nInformation\xa0" or "\n" in node.text:
             break
-        
+
         countries_text = node.text
         if "Countries:" in countries_text:
             countries_text = countries_text.split("Countries:")[1]
-        
+
         for text in re.split("[.]|;|,|[ \xa0]and|and[ \xa0]", countries_text):
             country = heavy_sanitize(text)
             if country:
                 countries.append(country)
-        
+
         node = node.next_sibling
-    
+
     return Group(
         name=name,
         aliases=aliases,
@@ -153,7 +153,7 @@ def page_to_group(page):
 
 for page in all_pages:
     group = page_to_group(page)
-    
+
     if group:
         for key in [group.name] + group.aliases:
             groups_found[key] = group
author	W. Kosior <koszko@koszko.org>	2025-01-09 01:18:35 +0100
committer	W. Kosior <koszko@koszko.org>	2025-01-09 01:18:35 +0100
commit	cc93455733b0ee24080e78d1a78b5fe624dc1709 (patch)
tree	e76a306efd0eff7d778ebe8d38ae47073466d28b
parent	d63572395f027b7776d57e62d0019800e3c4657d (diff)
download	AGH-threat-intel-course-cc93455733b0ee24080e78d1a78b5fe624dc1709.tar.gz AGH-threat-intel-course-cc93455733b0ee24080e78d1a78b5fe624dc1709.zip