[Author Prev][Author Next][Thread Prev][Thread Next][Author Index][Thread Index]
[tor-commits] [gettor/master] Updates to Twitter module, now it works. Still pending logging.
commit 9f2a5194c126f431579099185d5e1a12cbcec878
Author: ilv <ilv@xxxxxxxxxxxxxxxxxxxxxxxx>
Date: Fri Oct 30 22:11:52 2015 -0300
Updates to Twitter module, now it works. Still pending logging.
---
gettor/twitter.py | 507 ++++++++++++-----------------------------------------
process_tweets.py | 17 +-
2 files changed, 110 insertions(+), 414 deletions(-)
diff --git a/gettor/twitter.py b/gettor/twitter.py
index 05320ef..a197ac1 100644
--- a/gettor/twitter.py
+++ b/gettor/twitter.py
@@ -1,8 +1,8 @@
# -*- coding: utf-8 -*-
#
-# This file is part of GetTor, a Tor Browser distribution system.
+# This file is part of GetTor.
#
-# :authors: Israel Leiva <ilv@xxxxxxxxxx>
+# :authors: Israel Leiva <ilv@xxxxxxxxxxxxxx>
# Based on BridgeDB Twitter distributor (PoC) by wfn
# - https://github.com/wfn/twidibot
#
@@ -11,22 +11,18 @@
#
# :license: This is Free Software. See LICENSE for license information.
-import sys
-import json
-import time
-import signal
+import os
+import re
import tweepy
+import logging
import gettext
-
-from tweepy.models import Status
+import ConfigParser
import core
import utils
import blacklist
-"""Twitter module for processing requests. Forked from BridgeDB Twitter
-distributor by wfn (https://github.com/wfn/twidibot)"""
-
+"""Twitter channel for distributing links to download Tor Browser."""
class ConfigError(Exception):
pass
@@ -36,172 +32,24 @@ class InternalError(Exception):
pass
-class TwitterBotStreamListener(tweepy.StreamListener):
- """Listener for twitter's Streaming API."""
-
- def __init__(self, bot, api=None):
+class GetTorStreamListener(tweepy.StreamListener):
+ """ Basic listener for Twitter's streaming API."""
+ def __init__(self, bot):
self.bot = bot
- self.processing_data = False
-
- super(TwitterBotStreamListener, self).__init__(api)
-
- def on_data(self, raw_data):
- """Called when raw data is received from connection.
-
- This is where all the data comes first. Normally we could use
- (inherit) the on_data() in tweepy.StreamListener, but it unnecessarily
- and naively reports unknown event types as errors (to simple log);
- also, we might want to tweak it further later on.
-
- But for now, this is basically taken from tweepy's on_data().
-
- Return False to stop stream and close connection.
-
- """
-
- self.processing_data = True
-
- data = json.loads(raw_data)
-
- if 'in_reply_to_status_id' in data:
- status = Status.parse(self.api, data)
- if self.on_status(status) is False:
- return False
- elif 'delete' in data:
- delete = data['delete']['status']
- if self.on_delete(delete['id'], delete['user_id']) is False:
- return False
- elif 'event' in data:
- status = Status.parse(self.api, data)
- if self.on_event(status) is False:
- return False
- elif 'direct_message' in data:
- status = Status.parse(self.api, data)
- if self.on_direct_message(status) is False:
- return False
- elif 'limit' in data:
- if self.on_limit(data['limit']['track']) is False:
- return False
- elif 'disconnect' in data:
- if self.on_disconnect(data['disconnect']) is False:
- return False
- else:
- # we really are ok to receive unknown stream/event types.
- # log to debug?
- log.debug('TwitterBotStreamListener::on_data(): got event/stream'
- ' data of unknown type. Raw data follows:\n%s', data)
-
- self.processing_data = False
-
- def on_status(self, status):
- """Called when a new status arrives"""
-
- #log.debug('Got status: %s', status)
- return
-
- def on_event(self, status):
- """Called when a new event arrives"""
-
- #log.debug('Got event: %s', status)
-
- # XXX make sure tweepy's given 'status.event' unicode string can
- # always be safely converted to ascii
-
- # now it seems one can reply to dm without following the account
- # if str(status.event) == 'follow':
- # self.bot.handleFollowEvent(status)
-
- return
+ super(GetTorStreamListener, self).__init__(self.bot.api)
def on_direct_message(self, status):
- """Called when a new direct message arrives or is sent from us
-
- TODO: make a pull request for tweepy or something, because they
- say it's only when a direct message is *received* (implying, 'by us')
-
- """
-
- # doing twitter user comparisons using id_str makes sense here - it's
- # safe and id_str's are guaranteed to be unique (re: latter, just like
- # id's.) maybe consider deciding how comparisons should be made for sure,
- # and then extend tweepy.models.User to include __eq__?
+ """ Right now we only care about direct messages. """
if status.direct_message['sender']['id_str'] != self.bot.bot_info.id_str:
- self.bot.handleDirectMessage(status)
- else:
- # log.debug('Caught a direct message sent *from* us')
- pass
-
- return
-
- def on_connect(self):
- """Called once connected to streaming server.
-
- This will be invoked once a successful response
- is received from the server. Allows the listener
- to perform some work prior to entering the read loop.
-
- """
- pass
-
- def on_exception(self, exception):
- """Called when an unhandled exception occurs."""
- return
-
- def on_delete(self, status_id, user_id):
- """Called when a delete notice arrives for a status"""
- return
-
- def on_limit(self, track):
- """Called when a limitation notice arrvies"""
- return
-
- def on_error(self, status_code):
- """Called when a non-200 status code is returned"""
- return False
-
- def on_timeout(self):
- """Called when stream connection times out"""
- return
-
- def on_disconnect(self, notice):
- """Called when twitter sends a disconnect notice
+ self.bot.parse_request(status.direct_message)
- Disconnect codes are listed here:
- https://dev.twitter.com/docs/streaming-apis/messages
- #Disconnect_messages_disconnect
-
- """
- return
-
class TwitterBot(object):
- """Main interface between the stateful listener and Twitter APIs."""
-
- # TODO: think about secure ways of storing twitter access config.
- # For one, app itself should ideally not be able to have write access
- # to it. For another, ideally it would request details from some other
- # component, authenticate, and not be able to re-authenticate to twitter
-
- """
- default_access_config = {
- 'api_key': config.API_KEY,
- 'api_secret': config.API_SECRET,
- 'access_token': config.ACCESS_TOKEN,
- 'token_secret': config.TOKEN_SECRET
- }"""
-
- def __init__(self, **kw):
- """Constructor that accepts custom access config as named arguments
-
- Easy to test things from interactive shell this way.
- Probably won't be needed in production code.
+ """ Receive and reply requests via Twitter. """
+ def __init__(self, cfg=None):
+ """ Create new object by reading a configuration file.
- """
-
- """
- self.access_config = dict()
- for key, default in self.default_access_config.iteritems():
- self.access_config[key] = kw.get(key, default)
+ :param: cfg (string) the path of the configuration file.
"""
default_cfg = 'twitter.cfg'
@@ -222,11 +70,7 @@ class TwitterBot(object):
self.access_token = config.get('access_config', 'access_token')
self.token_secret = config.get('access_config', 'token_secret')
- self.async_streaming = config.get('api', 'async_streaming')
- self.char_limit = config.get('api', 'char_limit')
-
self.mirrors = config.get('general', 'mirrors')
- self.max_words = config.get('general', 'max_words')
self.i18ndir = config.get('i18n', 'dir')
logdir = config.get('log', 'dir')
@@ -235,8 +79,8 @@ class TwitterBot(object):
blacklist_cfg = config.get('blacklist', 'cfg')
self.bl = blacklist.Blacklist(blacklist_cfg)
- self.bl_max_req = config.get('blacklist', 'max_requests')
- self.bl_max_req = int(self.bl_max_req)
+ self.bl_max_request = config.get('blacklist', 'max_requests')
+ self.bl_max_request = int(self.bl_max_request)
self.bl_wait_time = config.get('blacklist', 'wait_time')
self.bl_wait_time = int(self.bl_wait_time)
@@ -251,13 +95,14 @@ class TwitterBot(object):
raise InternalError("Core error: %s" % str(e))
# logging
+ """
log = logging.getLogger(__name__)
logging_format = utils.get_logging_format()
date_format = utils.get_date_format()
formatter = logging.Formatter(logging_format, date_format)
- log.info('Redirecting SMTP logging to %s' % logfile)
+ log.info('Redirecting Twitter logging to %s' % logfile)
logfileh = logging.FileHandler(logfile, mode='a+')
logfileh.setFormatter(formatter)
logfileh.setLevel(logging.getLevelName(loglevel))
@@ -265,31 +110,29 @@ class TwitterBot(object):
# stop logging on stdout from now on
log.propagate = False
- self.log = log
-
- self.setSignalHandlers()
- self.msg = Messages()
-
-
- def _is_blacklisted(self, account):
+ #self.log = log"""
+
+ def _is_blacklisted(self, username):
"""Check if a user is blacklisted.
- :param: addr (string) the hashed address of the user.
+ :param: addr (string) the hashed username.
- :return: true is the address is blacklisted, false otherwise.
+ :return: true is the username is blacklisted, false otherwise.
"""
- anon_acc = utils.get_sha256(account)
+ hashed_username = utils.get_sha256(username)
try:
self.bl.is_blacklisted(
- anon_acc, 'Twitter', self.bl_max_req, self.bl_wait_time
+ hashed_username,
+ 'Twitter',
+ self.bl_max_request,
+ self.bl_wait_time
)
return False
except blacklist.BlacklistError as e:
return True
-
def _get_msg(self, msgid, lc):
"""Get message identified by msgid in a specific locale.
@@ -299,7 +142,7 @@ class TwitterBot(object):
Return: a string containing the given message.
"""
- self.log.debug("Getting message '%s' for locale %s" % (msgid, lc))
+ #self.log.debug("Getting message '%s' for locale %s" % (msgid, lc))
try:
t = gettext.translation(lc, self.i18ndir, languages=[lc])
_ = t.ugettext
@@ -308,11 +151,17 @@ class TwitterBot(object):
return msgstr
except IOError as e:
raise ConfigError("%s" % str(e))
-
-
- def _parse_request(self, message):
- """ """
- self.log.debug("Parsing text.")
+
+ def parse_text(self, msg):
+ """ Parse the text part of a message.
+
+ Split the message in words and look for patterns for locale,
+ operating system and mirrors requests.
+
+ :param: msg (string) the message received.
+
+ :return: request (list) 3-tuple with locale, os and type of request.
+ """
# core knows what OS are supported
supported_os = self.core.get_supported_os()
@@ -329,7 +178,8 @@ class TwitterBot(object):
found_mirrors = False
# analyze every word
- for word in message.split(' '):
+ words = re.split('\s+', msg.strip())
+ for word in words:
# look for lc and os
if not found_lc:
for lc in supported_lc:
@@ -352,154 +202,49 @@ class TwitterBot(object):
return req
+ def parse_request(self, dm):
+ """ Process the request received.
- def setSignalHandlers(self):
- """Set up relevant SIG* handlers for the bot.
+ Check if the user is not blacklisted and then check the body of
+ the message to find out what is asking.
- Note: if we want to handle some specific signal and not exit after
- catching it, we may need to store the original CPython handler
- (signified by signal.SIG_DFL), and restore it after handling the
- signal in question. For now, we'll only care about signals after
- which the program does exit.
+ :param: dm (status.direct_message) the direct message object received
+ via Twitter API.
"""
- # for now, we'll only handle SIGTERM. it might make sense to handle
- # SIGINT as well, though.
-
- signal.signal(signal.SIGTERM, self.handleSIGTERM)
- self.log.debug("SIGTERM handler is set.")
-
- def handleSIGTERM(self, sig_number, stack_frame):
- """Callback function called upon SIGTERM"""
-
- self.log.info("TwitterBot::handleSIGTERM(): caught SIGTERM signal.")
-
- self.log.info("Stopping bot listener.")
- self.listener.running = False
- while self.listener.processing_data:
- self.log.info(
- "Waiting for TwitterBotStreamListener to finish processing"
- " a data request/package"
- )
- time.sleep(0.5)
-
- self.log.info("Closing down storage controller.")
- self.storage_controller.closeAll()
-
- self.log.info("Exiting program.")
- sys.exit(0)
-
- def authenticate(self, auth=None):
- """Authenticate to Twitter API, get API handle, and remember it."""
-
- if auth:
- self.auth = auth
- else:
- self.auth = tweepy.OAuthHandler(
- self.api_key,
- self.api_secret
- )
-
- self.auth.set_access_token(
- self.access_token,
- self.token_secret
- )
-
- try:
- self.api = tweepy.API(self.auth)
- except Exception as e:
- self.log.fatal('Exception while authenticating to Twitter and '
- 'getting API handle: %s', e)
- self.api = None
- finally:
- # del self.auth # ideally we'd be able to delete this, but
- # presently - no; anything?
- pass
-
- if self.api:
- self.log.info('Authenticated and got the RESTful API handle')
- self.bot_info = self.api.me()
- #api.update_status('hello world!')
-
- def subscribeToStreams(self):
- """Subscribe to relevant streams in the Streaming API."""
-
- self.listener = TwitterBotStreamListener(
- bot=self,
- api=self.api
- )
-
- self.stream = tweepy.Stream(self.auth, self.listener)
-
- # user stream gives us direct messages and follow events
- self.stream.userstream(async=self.async_streaming)
- # stream.filter may be useful, but we don't need it for now
-
- # the following will not be executed if we're not going async -
- # userstream() blocks, its event handler loop takes over:
- self.log.info('Subscribed to relevant streams via Streaming API')
-
- def handleFollowEvent(self, event):
- """
- user_id = event.source['id'] # 'id' is unique big int
-
- if user_id != self.bot_info.id:
- user = self.api.get_user(id=user_id)
- user.follow()
-
- if config.RESPOND_AFTER_FOLLOW:
- # the following line *blocks* the thread that we care about.
- # we should not do this, ever. as long as we're just testing
- # with a few cat accounts, it's ok.
-
- # TODO: use sched.scheduler, or threading.Timer (or sth)
- time.sleep(config.WAIT_TIME_AFTER_FOLLOW)
-
- # for now, english by default
- self.sendMessage(
- user_id,
- get_msg('welcome', 'en')
- )
- """
- # it seems that we don't need to be followed to send dm
- pass
-
-
- def handleDirectMessage(self, status):
- """ Handle direct messages received (i.e. parse request). """
- sender_id = status.direct_message['sender_id']
- message = status.direct_message['text'].strip().lower()
-
- self.log.debug("Parsing request")
+ sender_id = dm['sender']['id_str']
+ msg = dm['text'].strip().lower()
+ bogus_request = False
+ request = None
+ status = ''
try:
if self._is_blacklisted(str(sender_id)):
- self.log.info("Request from blacklisted account!")
+ #self.log.info("Request from blacklisted account!")
status = 'blacklisted'
bogus_request = True
- # first let's find out how many words are in the message
- # request shouldn't be longer than 3 words, but just in case
- words = message.split(' ')
- if len(words) > self.max_words:
- bogus_request = True
- self.log.info("Message way too long")
- status = 'error'
- reply = self._get_msg('message_error', 'en')
-
if not bogus_request:
- self.log.debug("Request seems legit, let's parse it")
+ #self.log.debug("Request seems legit, let's parse it")
# let's try to guess what the user is asking
- req = self._parse_text(str(msg))
-
- if req['type'] == 'help':
- self.log.debug("Type of request: help")
- status = 'success'
- reply = self._get_msg('help', 'en')
+ request = self.parse_text(str(msg))
+
+ # possible options: links, mirrors, help
+ if request['type'] == 'links':
+ #self.log.debug("Type of request: help")
+ links = self.core.get_links(
+ 'Twitter',
+ request['os'],
+ request['lc']
+ )
+
+ reply = self._get_msg('links', 'en')
+ reply = reply % (request['os'], request['lc'], links)
+
- elif req['type'] == 'mirrors':
- self.log.debug("Type of request: mirrors")
+ elif request['type'] == 'mirrors':
+ #self.log.debug("Type of request: mirrors")
status = 'success'
reply = self._get_msg('mirrors', 'en')
try:
@@ -510,86 +255,50 @@ class TwitterBot(object):
except IOError as e:
reply = self._get_msg('mirrors_unavailable', 'en')
- elif req['type'] == 'links':
- self.log.debug("Type of request: help")
- links = self.core.get_links(
- "Twitter",
- req['os'],
- req['lc']
- )
- reply = self._get_msg('links', 'en')
- reply = reply % (req['os'], req['lc'], links)
- self.sendMessage(sender_id, reply)
-
+ else:
+ #self.log.debug("Type of request: help")
status = 'success'
-
- # send whatever the reply is
- self.sendMessage(sender_id, reply)
+ reply = self._get_msg('help', 'en')
+
+ self.api.send_direct_message(
+ user_id=sender_id,
+ text=reply
+ )
except (core.ConfigError, core.InternalError) as e:
# if core failes, send the user an error message, but keep going
- self.log.error("Something went wrong internally: %s" % str(e))
+ #self.log.error("Something went wrong internally: %s" % str(e))
status = 'core_error'
- reply = self._get_msg('internal_error', req['lc'])
+ reply = self._get_msg('internal_error', 'en')
finally:
# keep stats
- if req:
- self.log.debug("Adding request to database... ")
+ if request:
+ #self.log.debug("Adding request to database... ")
self.core.add_request_to_db()
- def sendMessage(self, target_id, message):
- # this is quick and ugly. primary splits (if needed) at newlines.
- """
- try:
- cur_message = ''
- for line in message.split('\n'):
- if len(cur_message + ('\n' if cur_message else '') + line)\
- > config.CHARACTER_LIMIT:
- self._split_in_chunks_and_send(target_id, cur_message)
- cur_message = ''
- else:
- cur_message += ('\n' if cur_message else '') + line
- if cur_message:
- self._split_in_chunks_and_send(target_id, cur_message)
- except Exception as e:
- # again, scrubbing 'target_id' should be an option, etc.
- log.warning('Failed to send a direct message to %s. Exception:\n%s',
- str(target_id), e)
- return False
- return True
+ def start(self):
+ """ Start the bot for handling requests.
+
+ Start a new Twitter bot.
"""
- # with new twitter limit of direct messages, we can send messages
- # without any trouble
- self.api.send_direct_message(
- user_id=target_id,
- text=message
+ self.auth = tweepy.OAuthHandler(
+ self.api_key,
+ self.api_secret
+ )
+
+ self.auth.set_access_token(
+ self.access_token,
+ self.token_secret
)
- """
- def _split_in_chunks_and_send(self, target_id, message):
- # assume any decent humane splitting has been done beforehand.
- # we have to do with what we have here.
- # exception handling at higher call stack.
-
- while message:
- self.api.send_direct_message(user_id=target_id,
- text=message[:config.CHARACTER_LIMIT])
- message = message[config.CHARACTER_LIMIT:]
- """
-
- """
- def followAllFollowers(self):
- # Start following everyone who is following us.
-
- for follower in tweepy.Cursor(self.api.followers).items():
- follower.follow()
- """
-
- """
- def unfollowAllFollowers(self):
- # Unfollow everyone who is following us.
-
- for follower in tweepy.Cursor(self.api.followers).items():
- follower.unfollow()
- """
+ self.api = tweepy.API(self.auth)
+ self.bot_info = self.api.me()
+
+ stream = tweepy.Stream(
+ auth = self.api.auth,
+ listener=GetTorStreamListener(self)
+ )
+
+ stream.userstream()
+
diff --git a/process_tweets.py b/process_tweets.py
index 9e14f8b..484cb97 100644
--- a/process_tweets.py
+++ b/process_tweets.py
@@ -7,22 +7,9 @@ import logging
import gettor.twitter
def main():
- """Quick way of running the thing.
-
- Note that default right now is 'no async', which means, function won't
- return; on the other hand everything "will just work."
-
- If async is off, then we can:
- >>> from quick_run import quick_run
- >>> bot = quick_run() # authenticate, subscribe to streaming api, get handle
- """
-
try:
- bot = TwitterBot()
- bot.authenticate()
- # bot.api.update_status('hello world!')
- bot.subscribeToStreams()
- return bot
+ bot = gettor.twitter.TwitterBot()
+ bot.start()
except gettor.twitter.ConfigError as e:
print "Configuration error: %s" % str(e)
except gettor.twitter.InternalError as e:
_______________________________________________
tor-commits mailing list
tor-commits@xxxxxxxxxxxxxxxxxxxx
https://lists.torproject.org/cgi-bin/mailman/listinfo/tor-commits