[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]

[tor-commits] [gettor/master] Modified email parser to get locales from db



commit 187e27da768d9fda5033da03b6c922a0baaa5d0c
Author: Cecylia Bocovich <cohosh@xxxxxxxxxxxxxx>
Date:   Fri Jan 24 17:26:55 2020 -0500

    Modified email parser to get locales from db
    
    Before we were conflating the locales for the tor browser binaries with
    the available locale for gettor email body localizations. This patch
    checks our links database for available locales.
---
 gettor/parse/email.py       | 19 ++++++++++++++-----
 gettor/utils/db.py          |  8 ++++++++
 scripts/process_email       |  1 +
 tests/test.conf.json        |  2 +-
 tests/test_email_service.py | 11 +++++++++--
 5 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/gettor/parse/email.py b/gettor/parse/email.py
index 4629b7c..56b91bf 100644
--- a/gettor/parse/email.py
+++ b/gettor/parse/email.py
@@ -56,6 +56,7 @@ class EmailParser(object):
         self.settings = settings
         self.dkim = dkim
         self.to_addr = to_addr
+        self.locales = []
 
     def normalize(self, msg):
         # Normalization will convert <Alice Wonderland> alice@xxxxxxxxxxxxxx
@@ -112,7 +113,7 @@ class EmailParser(object):
             return True
 
 
-    def build_request(self, msg_str, norm_addr, languages, platforms):
+    def build_request(self, msg_str, norm_addr, platforms):
         # Search for commands keywords
         subject_re = re.compile(r"Subject: (.*)\r\n")
         subject = subject_re.search(msg_str)
@@ -128,7 +129,7 @@ class EmailParser(object):
         if subject:
             subject = subject.group(1)
             for word in re.split(r"\s+", subject.strip()):
-                if word.lower() in languages:
+                if word.lower() in self.locales:
                     request["language"] = word.lower()
                 if word.lower() in platforms:
                     request["command"] = "links"
@@ -139,7 +140,7 @@ class EmailParser(object):
 
         if not request["command"] or not request["language"]:
             for word in re.split(r"\s+", msg_str.strip()):
-                if word.lower() in languages:
+                if word.lower() in self.locales:
                     request["language"] = word.lower()
                 if word.lower() in platforms:
                     request["command"] = "links"
@@ -159,6 +160,15 @@ class EmailParser(object):
         else:
             return True
 
+    @defer.inlineCallbacks
+    def get_locales(self):
+        dbname = self.settings.get("dbname")
+        conn = SQLite3(dbname)
+
+        locales = yield conn.get_locales()
+        for l in locales:
+            self.locales.append(l[0])
+
 
     def parse(self, msg_str):
         """
@@ -177,7 +187,6 @@ class EmailParser(object):
         log.msg("Building email message from string.", system="email parser")
 
         platforms = self.settings.get("platforms")
-        languages = [*strings.get_locales().keys()]
         msg = message_from_string(msg_str)
 
         name, norm_addr, to_name, norm_to_addr = self.normalize(msg)
@@ -203,7 +212,7 @@ class EmailParser(object):
         except ValueError as e:
             log.msg("DKIM error: {}".format(e.args))
 
-        request = self.build_request(msg_str, norm_addr, languages, platforms)
+        request = self.build_request(msg_str, norm_addr, platforms)
 
         return request
 
diff --git a/gettor/utils/db.py b/gettor/utils/db.py
index 525287b..1ccdf8e 100644
--- a/gettor/utils/db.py
+++ b/gettor/utils/db.py
@@ -104,3 +104,11 @@ class SQLite3(object):
 		return self.dbpool.runQuery(
 			query, (platform, language, status)
 		).addCallback(self.query_callback).addErrback(self.query_errback)
+
+	def get_locales(self):
+		"""
+		Get a list of the supported tor browser binary locales
+		"""
+		query = "SELECT DISTINCT language FROM links"
+		return self.dbpool.runQuery(query
+		).addCallback(self.query_callback).addErrback(self.query_errback)
diff --git a/scripts/process_email b/scripts/process_email
index 37c4e0b..cce7bcc 100755
--- a/scripts/process_email
+++ b/scripts/process_email
@@ -28,6 +28,7 @@ def process_email(message):
 
     try:
         ep = EmailParser(settings, "gettor@xxxxxxxxxxxxxx")
+        yield ep.get_locales().addErrback(ep.parse_errback)
         yield defer.maybeDeferred(
             ep.parse, message
         ).addCallback(ep.parse_callback).addErrback(ep.parse_errback)
diff --git a/tests/test.conf.json b/tests/test.conf.json
index 8f296cc..03bbaf6 100644
--- a/tests/test.conf.json
+++ b/tests/test.conf.json
@@ -1,6 +1,6 @@
 {
   "platforms": ["linux", "osx", "windows"],
-  "dbname": "gettor.db",
+  "dbname": "tests/gettor.db",
   "email_parser_logfile": "email_parser.log",
   "email_requests_limit": 30,
   "twitter_requests_limit": 1,
diff --git a/tests/test_email_service.py b/tests/test_email_service.py
index 187711e..5fa87fc 100644
--- a/tests/test_email_service.py
+++ b/tests/test_email_service.py
@@ -65,9 +65,9 @@ class EmailServiceTests(unittest.TestCase):
         ep = conftests.EmailParser(self.settings, "gettor@xxxxxxxxxxxxxx")
         msg_str = "From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo: gettor@xxxxxxxxxxxxxx\r\n osx es"
         msg = conftests.message_from_string(msg_str)
-        languages = [*self.locales.keys()]
         platforms = self.settings.get('platforms')
-        request = ep.build_request(msg_str, "hiro@xxxxxxxxxxxxxx", languages, platforms)
+        ep.locales = ["es", "en"]
+        request = ep.build_request(msg_str, "hiro@xxxxxxxxxxxxxx", platforms)
         self.assertEqual(request["command"], "links")
         self.assertEqual(request["platform"], "osx")
         self.assertEqual(request["language"], "es")
@@ -83,11 +83,18 @@ class EmailServiceTests(unittest.TestCase):
 
     def test_language_email_parser(self):
         ep = conftests.EmailParser(self.settings, "gettor@xxxxxxxxxxxxxx")
+        ep.locales = ["en", "ru"]
         request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo: gettor@xxxxxxxxxxxxxx\n osx en")
         self.assertEqual(request["command"], "links")
         self.assertEqual(request["platform"], "osx")
         self.assertEqual(request["language"], "en")
 
+        request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo: gettor@xxxxxxxxxxxxxx\n linux ru")
+        self.assertEqual(request["command"], "links")
+        self.assertEqual(request["platform"], "linux")
+        self.assertEqual(request["language"], "ru")
+
+
     def test_sent_links_message(self):
         ep = self.sm_client
         links = self.links



_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits