[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [gettor/master] Only parse email subject and body
commit 6a95ed9e0b9cc99cc77567164c117876aeecf376
Author: Cecylia Bocovich <cohosh@xxxxxxxxxxxxxx>
Date: Mon Jan 11 16:41:26 2021 -0500
Only parse email subject and body
Closes issue #75 where the headers were being parsed for locale
instead of the message body.
---
gettor/parse/email.py | 12 +++++-
tests/test_email_service.py | 90 +++++++++++++++++++++++++++++++--------------
2 files changed, 72 insertions(+), 30 deletions(-)
diff --git a/gettor/parse/email.py b/gettor/parse/email.py
index fa945ba..1f70ea3 100644
--- a/gettor/parse/email.py
+++ b/gettor/parse/email.py
@@ -145,9 +145,15 @@ class EmailParser(object):
def build_request(self, msg_str, norm_addr):
# Search for commands keywords
- subject_re = re.compile(r"Subject: (.*)\r\n")
+ subject_re = re.compile("Subject: (.*)\n")
subject = subject_re.search(msg_str)
+ # the body of a message is "a sequence of characters that follows the header
+ # section and is separated from the header section by an empty line"
+ # https://tools.ietf.org/html/rfc5322#section-2.1
+ body_re = re.compile("\r?\n\r?\n(.*)$", re.DOTALL)
+ body = body_re.search(msg_str)
+
request = {
"id": norm_addr,
"command": None,
@@ -161,7 +167,9 @@ class EmailParser(object):
request = self.parse_keywords(subject, request)
# Always parse the body too, to see if there's more specific information
- request = self.parse_keywords(msg_str, request)
+ if body:
+ body = body.group(1)
+ request = self.parse_keywords(body, request)
if not request["language"]:
request["language"] = "en-US"
diff --git a/tests/test_email_service.py b/tests/test_email_service.py
index 47d7e5f..de421a6 100644
--- a/tests/test_email_service.py
+++ b/tests/test_email_service.py
@@ -57,7 +57,7 @@ class EmailServiceTests(unittest.TestCase):
def test_build_request(self):
ep = conftests.EmailParser(self.settings, "gettor@xxxxxxxxxxxxxx")
- msg_str = "From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo: gettor@xxxxxxxxxxxxxx\r\n osx es"
+ msg_str = "From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo: gettor@xxxxxxxxxxxxxx\r\n\r\n osx es"
msg = conftests.message_from_string(msg_str)
ep.locales = ["es", "en"]
request = ep.build_request(msg_str, "hiro@xxxxxxxxxxxxxx")
@@ -81,32 +81,32 @@ class EmailServiceTests(unittest.TestCase):
ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"]
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n\n")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n")
self.assertEqual(request["language"], "en-US")
self.assertEqual(request["command"], "help")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n\n please send me tor\n")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n please send me tor\n")
self.assertEqual(request["language"], "en-US")
self.assertEqual(request["command"], "help")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n\nwindows\n")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\nwindows\n")
self.assertEqual(request["language"], "en-US")
self.assertEqual(request["platform"], "windows")
self.assertEqual(request["command"], "links")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n\n fa\n")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n fa\n")
self.assertEqual(request["language"], "fa")
self.assertEqual(request["command"], "help")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n\n please help me get tor for windows\n")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n please help me get tor for windows\n")
self.assertEqual(request["language"], "en-US")
self.assertEqual(request["command"], "links")
self.assertEqual(request["platform"], "windows")
@@ -116,60 +116,93 @@ class EmailServiceTests(unittest.TestCase):
ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"]
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n osx en")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n osx en")
self.assertEqual(request["language"], "en-US")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n osx ES")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n osx ES")
self.assertEqual(request["language"], "es-ES")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n osx en-US")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n osx en-US")
self.assertEqual(request["language"], "en-US")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n linux fa")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n linux fa")
self.assertEqual(request["language"], "fa")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n osx es")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n osx es")
self.assertEqual(request["language"], "es-ES")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n linux zz")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n linux zz")
self.assertEqual(request["language"], "en-US")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n linux pt-PT")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n linux pt-PT")
self.assertEqual(request["language"], "pt-BR")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: \r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n linux es-AR")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n linux es-AR")
self.assertEqual(request["language"], "es-AR")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: linux es\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n linux es-AR")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n linux es-AR")
self.assertEqual(request["language"], "es-AR")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: linux es\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n linux")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n linux")
self.assertEqual(request["language"], "es-ES")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: linux es-AR\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n linux es")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n linux es")
self.assertEqual(request["language"], "es-AR")
del ep
+ def test_body_subject_parser(self):
+ ep = conftests.EmailParser(self.settings, "gettor@xxxxxxxxxxxxxx")
+ ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa"]
+ request = ep.parse(
+ "To: gettor@xxxxxxxxxxxxxx\r\n"
+ "From: Cecylia Bocovich <cohosh@xxxxxxxxxxxxxx>\r\n"
+ "Subject: windows es\r\n"
+ "Message-ID: <0befc58c-c94d-c262-9597-4365122c04b4@xxxxxxxxxxxxxx>\r\n"
+ "Date: Mon, 11 Jan 2021 11:28:37 -0500\r\n"
+ "MIME-Version: 1.0\r\n"
+ "Content-Type: text/plain; charset=utf-8\r\n"
+ "Content-Language: en-US\r\n"
+ "Content-Transfer-Encoding: 7bit\r\n"
+ "\r\n"
+ "windows es\r\n"
+ )
+ self.assertEqual(request["language"], "es-ES")
+ request = ep.parse(
+ "To: gettor@xxxxxxxxxxxxxx\n"
+ "From: Cecylia Bocovich <cohosh@xxxxxxxxxxxxxx>\n"
+ "Subject: linux fa\n"
+ "Message-ID: <0befc58c-c94d-c262-9597-4365122c04b4@xxxxxxxxxxxxxx>\n"
+ "Date: Mon, 11 Jan 2021 11:28:37 -0500\n"
+ "MIME-Version: 1.0\n"
+ "Content-Type: text/plain; charset=utf-8\n"
+ "Content-Language: en-US\n"
+ "Content-Transfer-Encoding: 7bit\n"
+ "\n"
+ "linux fa\n"
+ )
+ self.assertEqual(request["language"], "fa")
+ del ep
+
@pytest_twisted.inlineCallbacks
def test_sent_links_message(self):
ep = self.sm_client
@@ -216,19 +249,19 @@ class EmailServiceTests(unittest.TestCase):
ep.locales = ["en-US", "es-ES", "es-AR", "pt-BR", "fa", "fr"]
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n osx en\n")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n osx en\n")
self.assertEqual(request["command"], "links")
self.assertEqual(request["language"], "en-US")
self.assertEqual(request["platform"], "osx")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n i like french fries\n")
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n i like french fries\n")
self.assertEqual(request["command"], "help")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\nlinux fa\n\n"
+ "gettor@xxxxxxxxxxxxxx\r\n\r\nlinux fa\n"
"On 2020-02-10 11:54 a.m., gettor@xxxxxxxxxxxxxx wrote:\n"
"> This is how you can request a tor browser bundle link.\n"
">\n"
@@ -237,14 +270,15 @@ class EmailServiceTests(unittest.TestCase):
"> In the body of the email only write: <operating system> <language>.\n"
">\n"
"> We only support windows, osx and linux as operating systems.\n"
- ">\n")
+ ">\n"
+ )
self.assertEqual(request["command"], "links")
self.assertEqual(request["language"], "fa")
self.assertEqual(request["platform"], "linux")
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\nlinux fa\n\n"
+ "gettor@xxxxxxxxxxxxxx\r\n\r\nlinux fa\n"
"On 2020-02-10 11:54 a.m., gettor@xxxxxxxxxxxxxx wrote:\n"
"This is how you can request a tor browser bundle link.\n"
"\n"
@@ -260,7 +294,7 @@ class EmailServiceTests(unittest.TestCase):
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n"
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n"
"On 2020-02-10 11:54 a.m., gettor@xxxxxxxxxxxxxx wrote:\n"
"> This is how you can request a tor browser bundle link.\n"
">\n"
@@ -277,7 +311,7 @@ class EmailServiceTests(unittest.TestCase):
request = ep.parse("From: \"silvia [hiro]\" <hiro@xxxxxxxxxxxxxx>\n"
"Subject: Re: [GetTor] Help Email\r\n Reply-To: hiro@xxxxxxxxxxxxxx \nTo:"
- "gettor@xxxxxxxxxxxxxx\n"
+ "gettor@xxxxxxxxxxxxxx\r\n\r\n"
"On 2020-02-10 11:54 a.m., gettor@xxxxxxxxxxxxxx wrote:\n"
"> This is how you can request a tor browser bundle link.\n"
">\n"
@@ -293,22 +327,22 @@ class EmailServiceTests(unittest.TestCase):
ep = conftests.EmailParser(self.settings, "gettor@xxxxxxxxxxxxxx")
request = ep.parse("From: MAILER-DAEMON@xxxxxxxxxxxxxx\n"
"Subject: Undelivered Mail Returned to Sender\r\n"
- "To: gettor@xxxxxxxxxxxxxx\n osx en\n")
+ "To: gettor@xxxxxxxxxxxxxx\r\n\r\n osx en\n")
self.assertEqual(request, {})
request = ep.parse("From: postmaster@xxxxxxxxxx\n"
"Subject: Undelivered Mail Returned to Sender\r\n"
- "To: gettor@xxxxxxxxxxxxxx\n\n osx en\n")
+ "To: gettor@xxxxxxxxxxxxxx\r\n\r\n osx en\n")
self.assertEqual(request, {})
request = ep.parse("From: gettor@xxxxxxxxxxxxxx\n"
"Subject: links\r\n"
- "To: gettor@xxxxxxxxxxxxxx\n\n osx en\n")
+ "To: gettor@xxxxxxxxxxxxxx\r\n\r\n osx en\n")
self.assertEqual(request, {})
request = ep.parse("From: gettor+en@xxxxxxxxxxxxxx\n"
"Subject: links\r\n"
- "To: gettor@xxxxxxxxxxxxxx\n\n osx en\n")
+ "To: gettor@xxxxxxxxxxxxxx\r\n\r\n osx en\n")
self.assertEqual(request, {})
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits