-
Notifications
You must be signed in to change notification settings - Fork 59
/
Copy pathtwitter.py
1623 lines (1362 loc) · 58.2 KB
/
twitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# coding=utf-8
"""Twitter source class.
Uses the v1.1 REST API: https://dev.twitter.com/docs/api
TODO: collections for twitter accounts; use as activity target?
The Audience Targeting 'to' field is set to @public or @private based on whether
the tweet author's 'protected' field is true or false.
https://dev.twitter.com/docs/platform-objects/users
"""
from __future__ import absolute_import, unicode_literals
from future import standard_library
standard_library.install_aliases()
from future.moves.urllib import error as urllib_error
from builtins import range, str, zip
import collections
import datetime
import http.client
import itertools
import logging
import mimetypes
import re
import socket
import urllib.parse, urllib.request
from . import appengine_config
from bs4 import BeautifulSoup
import brevity
from . import source
from oauth_dropins import twitter_auth
from oauth_dropins.webutil import util
API_BASE = 'https://api.twitter.com/1.1/'
API_BLOCK_IDS = 'blocks/ids.json?count=5000&stringify_ids=true&cursor=%s'
API_BLOCKS = 'blocks/list.json?skip_status=true&count=5000&cursor=%s'
API_CURRENT_USER = 'account/verify_credentials.json'
API_DELETE_TWEET = 'statuses/destroy.json'
API_DELETE_FAVORITE = 'favorites/destroy.json'
API_FAVORITES = 'favorites/list.json?screen_name=%s&include_entities=true&tweet_mode=extended'
API_LIST_TIMELINE = 'lists/statuses.json?include_entities=true&tweet_mode=extended&count=%(count)d&slug=%(slug)s&owner_screen_name=%(owner_screen_name)s'
API_LOOKUP = 'statuses/lookup.json?id=%s&include_entities=true&tweet_mode=extended'
API_POST_FAVORITE = 'favorites/create.json'
API_POST_MEDIA = 'statuses/update_with_media.json'
API_POST_RETWEET = 'statuses/retweet/%s.json'
API_POST_TWEET = 'statuses/update.json'
API_RETWEETS = 'statuses/retweets.json?id=%s&tweet_mode=extended'
API_SEARCH = 'search/tweets.json?q=%(q)s&include_entities=true&tweet_mode=extended&result_type=recent&count=%(count)d'
API_STATUS = 'statuses/show.json?id=%s&include_entities=true&tweet_mode=extended'
API_TIMELINE = 'statuses/home_timeline.json?include_entities=true&tweet_mode=extended&count=%d'
API_UPLOAD_MEDIA = 'https://upload.twitter.com/1.1/media/upload.json'
API_MEDIA_METADATA = 'https://upload.twitter.com/1.1/media/metadata/create.json'
API_USER = 'users/show.json?screen_name=%s'
API_USER_TIMELINE = 'statuses/user_timeline.json?include_entities=true&tweet_mode=extended&count=%(count)d&screen_name=%(screen_name)s'
HTML_FAVORITES = 'https://twitter.com/i/activity/favorited_popup?id=%s'
TWEET_URL_RE = re.compile(r'https://twitter\.com/[^/?]+/status(es)?/[^/?]+$')
HTTP_RATE_LIMIT_CODES = (429, 503)
# Don't hit the RETWEETS endpoint more than this many times per
# get_activities() call.
# https://dev.twitter.com/docs/rate-limiting/1.1/limits
# TODO: sigh. figure out a better way. dammit twitter, give me a batch API!!!
RETWEET_LIMIT = 15
# Number of IDs to search for at a time
QUOTE_SEARCH_BATCH_SIZE = 20
# For read requests only.
RETRIES = 3
# Config constants, as of 2017-11-08:
# * Current max tweet length and expected length of a t.co URL.
# https://twittercommunity.com/t/updating-the-character-limit-and-the-twitter-text-library/96425
# https://dev.twitter.com/docs/tco-link-wrapper/faq
# * Max media per tweet.
# https://dev.twitter.com/rest/reference/post/statuses/update#api-param-media_ids
# * Allowed image formats:
# https://dev.twitter.com/rest/media/uploading-media#imagerecs
# * Allowed video formats, max video size, and upload chunk size:
# https://dev.twitter.com/rest/public/uploading-media#keepinmind
# * Max alt text length.
# https://developer.twitter.com/en/docs/media/upload-media/api-reference/opst-media-metadata-create
#
# Update by running help/configuration.json manually in
# https://apigee.com/embed/console/twitter
#
# TODO: pull these from /help/configuration.json instead (except max tweet length)
# https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration
MAX_TWEET_LENGTH = 280
TCO_LENGTH = 23
MAX_MEDIA = 4
IMAGE_MIME_TYPES = frozenset(('image/jpg', 'image/jpeg', 'image/png',
'image/gif', 'image/webp',))
VIDEO_MIME_TYPES = frozenset(('video/mp4',))
MB = 1024 * 1024
MAX_VIDEO_SIZE = 15 * MB
UPLOAD_CHUNK_SIZE = 5 * MB
MAX_ALT_LENGTH = 420
# username requirements and limits:
# https://support.twitter.com/articles/101299#error
# http://stackoverflow.com/a/13396934/186123
MENTION_RE = re.compile(r'(^|[^\w@/\!?=&])@(\w{1,15})\b', re.UNICODE)
# hashtag requirements and limits:
# https://support.twitter.com/articles/370610
# http://stackoverflow.com/questions/8451846
HASHTAG_RE = re.compile(r'(^|\s)[##](\w+)\b', re.UNICODE)
class OffsetTzinfo(datetime.tzinfo):
"""A simple, DST-unaware tzinfo from given utc offset in seconds.
"""
def __init__(self, utc_offset=0):
"""Constructor.
Args:
utc_offset: Offset of time zone from UTC in seconds
"""
self._offset = datetime.timedelta(seconds=utc_offset)
def utcoffset(self, dt):
return self._offset
def dst(self, dt):
return datetime.timedelta(0)
class Twitter(source.Source):
"""Twitter source class. See file docstring and Source class for details."""
DOMAIN = 'twitter.com'
BASE_URL = 'https://twitter.com/'
NAME = 'Twitter'
FRONT_PAGE_TEMPLATE = 'templates/twitter_index.html'
POST_ID_RE = re.compile('^[0-9]+$')
# HTML snippet for embedding a tweet.
# https://dev.twitter.com/docs/embedded-tweets
EMBED_POST = """
<script async defer src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
<br />
<blockquote class="twitter-tweet" lang="en" data-dnt="true">
<p>%(content)s
<a href="%(url)s">#</a></p>
</blockquote>
"""
URL_CANONICALIZER = util.UrlCanonicalizer(
domain=DOMAIN,
approve=TWEET_URL_RE,
reject=r'https://twitter\.com/.+\?protected_redirect=true')
def __init__(self, access_token_key, access_token_secret, username=None):
"""Constructor.
Twitter now requires authentication in v1.1 of their API. You can get an
OAuth access token by creating an app here: https://dev.twitter.com/apps/new
Args:
access_token_key: string, OAuth access token key
access_token_secret: string, OAuth access token secret
username: string, optional, the current user. Used in e.g. preview/create.
"""
self.access_token_key = access_token_key
self.access_token_secret = access_token_secret
self.username = username
def get_actor(self, screen_name=None):
"""Returns a user as a JSON ActivityStreams actor dict.
Args:
screen_name: string username. Defaults to the current user.
"""
if screen_name is None:
url = API_CURRENT_USER
else:
url = API_USER % screen_name
return self.user_to_actor(self.urlopen(url))
def get_activities_response(self, user_id=None, group_id=None, app_id=None,
activity_id=None, start_index=0, count=0,
etag=None, min_id=None, cache=None,
fetch_replies=False, fetch_likes=False,
fetch_shares=False, fetch_events=False,
fetch_mentions=False, search_query=None, **kwargs):
"""Fetches posts and converts them to ActivityStreams activities.
XXX HACK: this is currently hacked for bridgy to NOT pass min_id to the
request for fetching activity tweets themselves, but to pass it to all of
the requests for filling in replies, retweets, etc. That's because we want
to find new replies and retweets of older initial tweets.
TODO: find a better way.
See :meth:`source.Source.get_activities_response()` for details. app_id is
ignored. min_id is translated to Twitter's since_id.
The code for handling ETags (and 304 Not Changed responses and setting
If-None-Match) is here, but unused right now since Twitter evidently doesn't
support ETags. From https://dev.twitter.com/discussions/5800 :
"I've confirmed with our team that we're not explicitly supporting this
family of features."
Likes (ie favorites) are scraped from twitter.com HTML, since Twitter's REST
API doesn't offer a way to fetch them. You can also get them from the
Streaming API, though, and convert them with streaming_event_to_object().
https://dev.twitter.com/docs/streaming-apis/messages#Events_event
Shares (ie retweets) are fetched with a separate API call per tweet:
https://dev.twitter.com/docs/api/1.1/get/statuses/retweets/%3Aid
However, retweets are only fetched for the first 15 tweets that have them,
since that's Twitter's rate limit per 15 minute window. :(
https://dev.twitter.com/docs/rate-limiting/1.1/limits
Quote tweets are fetched by searching for the possibly quoted tweet's ID,
using the OR operator to search up to 5 IDs at a time, and then checking
the quoted_status_id_str field
https://dev.twitter.com/overview/api/tweets#quoted_status_id_str
Use the group_id @self to retrieve a user_id’s timeline. If user_id is None
or @me, it will return tweets for the current API user.
group_id can be used to specify the slug of a list for which to return tweets.
By default the current API user’s lists will be used, but lists owned by other
users can be fetched by explicitly passing a username to user_id, e.g. to
fetch tweets from the list @exampleuser/example-list you would call
get_activities(user_id='exampleuser', group_id='example-list').
Twitter replies default to including a mention of the user they're replying
to, which overloads mentions a bit. When fetch_mentions is True, we determine
that a tweet mentions the current user if it @-mentions their username and:
* it's not a reply, OR
* it's a reply, but not to the current user, AND
* the tweet it's replying to doesn't @-mention the current user
"""
if group_id is None:
group_id = source.FRIENDS
if user_id and user_id.startswith('@'):
user_id = user_id[1:]
# nested function for lazily fetching the user object if we need it
user = []
def _user():
if not user:
user.append(self.urlopen(API_USER % user_id if user_id else API_CURRENT_USER))
return user[0]
if count:
count += start_index
activities = []
if activity_id:
self._validate_id(activity_id)
tweets = [self.urlopen(API_STATUS % activity_id)]
total_count = len(tweets)
else:
if group_id == source.SELF:
if user_id in (None, source.ME):
user_id = ''
url = API_USER_TIMELINE % {
'count': count,
'screen_name': user_id,
}
if fetch_likes:
liked = self.urlopen(API_FAVORITES % user_id)
if liked:
activities += [self._make_like(tweet, _user()) for tweet in liked]
elif group_id == source.SEARCH:
if not search_query:
raise ValueError('search requires search_query parameter')
url = API_SEARCH % {
'q': urllib.parse.quote_plus(search_query.encode('utf-8')),
'count': count,
}
elif group_id in (source.FRIENDS, source.ALL):
url = API_TIMELINE % (count)
else:
if not user_id:
user_id = _user().get('screen_name')
url = API_LIST_TIMELINE % {
'count': count,
'slug': group_id,
'owner_screen_name': user_id,
}
headers = {'If-None-Match': etag} if etag else {}
total_count = None
try:
resp = self.urlopen(url, headers=headers, parse_response=False)
etag = resp.info().get('ETag')
tweet_obj = source.load_json(resp.read(), url)
if group_id == source.SEARCH:
tweet_obj = tweet_obj.get('statuses', [])
tweets = tweet_obj[start_index:]
except urllib_error.HTTPError as e:
if e.code == 304: # Not Modified, from a matching ETag
tweets = []
else:
raise
# batch get memcached counts of favorites and retweets for all tweets
cached = {}
if cache is not None:
keys = itertools.product(('ATR', 'ATF'), [t['id_str'] for t in tweets])
cached = cache.get_multi('%s %s' % (prefix, id) for prefix, id in keys)
# only update the cache at the end, in case we hit an error before then
cache_updates = {}
if fetch_shares:
retweet_calls = 0
for tweet in tweets:
# don't fetch retweets if the tweet is itself a retweet or if the
# author's account is protected. /statuses/retweets 403s with error
# code 200 (?!) for protected accounts.
# https://github.com/snarfed/bridgy/issues/688
if tweet.get('retweeted') or tweet.get('user', {}).get('protected'):
continue
elif retweet_calls >= RETWEET_LIMIT:
logging.warning("Hit Twitter's retweet rate limit (%d) with more to "
"fetch! Results will be incomplete!" % RETWEET_LIMIT)
break
# store retweets in the 'retweets' field, which is handled by
# tweet_to_activity().
# TODO: make these HTTP requests asynchronous. not easy since we don't
# (yet) require threading support or use a non-blocking HTTP library.
#
# twitter limits this API endpoint to one call per minute per user,
# which is easy to hit, so we stop before we hit that.
# https://dev.twitter.com/docs/rate-limiting/1.1/limits
#
# can't use the statuses/retweets_of_me endpoint because it only
# returns the original tweets, not the retweets or their authors.
id = tweet['id_str']
count = tweet.get('retweet_count')
if count and count != cached.get('ATR ' + id):
url = API_RETWEETS % id
if min_id is not None:
url = util.add_query_params(url, {'since_id': min_id})
try:
tweet['retweets'] = self.urlopen(url)
except urllib_error.URLError as e:
code, _ = util.interpret_http_exception(e)
if code != '404': # 404 means the original tweet was deleted
raise
retweet_calls += 1
cache_updates['ATR ' + id] = count
tweet_activities = [self.tweet_to_activity(t) for t in tweets]
if fetch_replies:
self.fetch_replies(tweet_activities, min_id=min_id)
if fetch_mentions:
# fetch mentions *after* replies so that we don't get replies to mentions
# https://github.com/snarfed/bridgy/issues/631
mentions = self.fetch_mentions(_user().get('screen_name'), tweets,
min_id=min_id)
tweet_activities += [self.tweet_to_activity(m) for m in mentions]
if fetch_likes:
for tweet, activity in zip(tweets, tweet_activities):
id = tweet['id_str']
count = tweet.get('favorite_count')
if self.is_public(activity) and count and count != cached.get('ATF ' + id):
url = HTML_FAVORITES % id
try:
resp = util.urlopen(url).read()
html = source.load_json(resp, url).get('htmlUsers', '')
except urllib_error.URLError as e:
util.interpret_http_exception(e) # just log it
continue
likes = self.favorites_html_to_likes(tweet, html)
activity['object'].setdefault('tags', []).extend(likes)
cache_updates['ATF ' + id] = count
activities += tweet_activities
response = self.make_activities_base_response(activities)
response.update({'total_count': total_count, 'etag': etag})
if cache_updates and cache is not None:
cache.set_multi(cache_updates)
return response
def fetch_replies(self, activities, min_id=None):
"""Fetches and injects Twitter replies into a list of activities, in place.
Includes indirect replies ie reply chains, not just direct replies. Searches
for @-mentions, matches them to the original tweets with
in_reply_to_status_id_str, and recurses until it's walked the entire tree.
Args:
activities: list of activity dicts
Returns:
same activities list
"""
# cache searches for @-mentions for individual users. maps username to dict
# mapping tweet id to ActivityStreams reply object dict.
mentions = {}
# find replies
for activity in activities:
# list of ActivityStreams reply object dict and set of seen activity ids
# (tag URIs). seed with the original tweet; we'll filter it out later.
replies = [activity]
_, id = util.parse_tag_uri(activity['id'])
seen_ids = set([id])
for reply in replies:
# get mentions of this tweet's author so we can search them for replies to
# this tweet. can't use statuses/mentions_timeline because i'd need to
# auth as the user being mentioned.
# https://dev.twitter.com/docs/api/1.1/get/statuses/mentions_timeline
#
# note that these HTTP requests are synchronous. you can make async
# requests by using urlfetch.fetch() directly, but not with urllib2.
# https://developers.google.com/appengine/docs/python/urlfetch/asynchronousrequests
author = reply['actor']['username']
if author not in mentions:
url = API_SEARCH % {
'q': urllib.parse.quote_plus('@' + author),
'count': 100,
}
if min_id is not None:
url = util.add_query_params(url, {'since_id': min_id})
mentions[author] = self.urlopen(url)['statuses']
# look for replies. add any we find to the end of replies. this makes us
# recursively follow reply chains to their end. (python supports
# appending to a sequence while you're iterating over it.)
for mention in mentions[author]:
id = mention['id_str']
if (mention.get('in_reply_to_status_id_str') in seen_ids and
id not in seen_ids):
replies.append(self.tweet_to_activity(mention))
seen_ids.add(id)
items = [r['object'] for r in replies[1:]] # filter out seed activity
activity['object']['replies'] = {
'items': items,
'totalItems': len(items),
}
def fetch_mentions(self, username, tweets, min_id=None):
"""Fetches a user's @-mentions and returns them as ActivityStreams.
Tries to only include explicit mentions, not mentions automatically created
by @-replying. See the :meth:`get_activities()` docstring for details.
Args:
username: string
tweets: list of Twitter API objects. used to find quote tweets quoting them.
min_id: only return activities with ids greater than this
Returns:
list of activity dicts
"""
# get @-name mentions
url = API_SEARCH % {
'q': urllib.parse.quote_plus('@' + username),
'count': 100,
}
if min_id is not None:
url = util.add_query_params(url, {'since_id': min_id})
candidates = self.urlopen(url)['statuses']
# fetch in-reply-to tweets (if any)
in_reply_to_ids = util.trim_nulls(
[c.get('in_reply_to_status_id_str') for c in candidates])
origs = {
o.get('id_str'): o for o in
self.urlopen(API_LOOKUP % ','.join(in_reply_to_ids))
} if in_reply_to_ids else {}
# filter out tweets that we don't consider mentions
mentions = []
for c in candidates:
if (c.get('user', {}).get('screen_name') == username or
c.get('retweeted_status')):
continue
reply_to = origs.get(c.get('in_reply_to_status_id_str'))
if not reply_to:
mentions.append(c)
else:
reply_to_user = reply_to.get('user', {}).get('screen_name')
mentioned = [u.get('screen_name') for u in
reply_to.get('entities', {}).get('user_mentions', [])]
if username != reply_to_user and username not in mentioned:
mentions.append(c)
# search for quote tweets
# Guideline ("Limit your searches to 10 keywords and operators.")
# implies fewer, but 20 IDs seems to work in practice.
# https://dev.twitter.com/rest/public/search
for batch in [
tweets[i:i + QUOTE_SEARCH_BATCH_SIZE]
for i in range(0, len(tweets), QUOTE_SEARCH_BATCH_SIZE)
]:
batch_ids = [t['id_str'] for t in batch]
url = API_SEARCH % {
'q': urllib.parse.quote_plus(' OR '.join(batch_ids)),
'count': 100,
}
if min_id is not None:
url = util.add_query_params(url, {'since_id': min_id})
candidates = self.urlopen(url)['statuses']
for c in candidates:
quoted_status_id = c.get('quoted_status_id_str')
if (quoted_status_id and quoted_status_id in batch_ids and
not c.get('retweeted_status')):
mentions.append(c)
return mentions
def get_comment(self, comment_id, activity_id=None, activity_author_id=None,
activity=None):
"""Returns an ActivityStreams comment object.
Args:
comment_id: string comment id
activity_id: string activity id, optional
activity_author_id: string activity author id. Ignored.
activity: activity object, optional
"""
self._validate_id(comment_id)
url = API_STATUS % comment_id
return self.tweet_to_object(self.urlopen(url))
def get_share(self, activity_user_id, activity_id, share_id, activity=None):
"""Returns an ActivityStreams 'share' activity object.
Args:
activity_user_id: string id of the user who posted the original activity
activity_id: string activity id
share_id: string id of the share object
activity: activity object, optional
"""
self._validate_id(share_id)
url = API_STATUS % share_id
return self.retweet_to_object(self.urlopen(url))
def get_blocklist(self):
"""Returns the current user's block list.
May make multiple API calls, using cursors, to fully fetch large blocklists.
https://dev.twitter.com/overview/api/cursoring
Block lists may have up to 10k users, but each API call only returns 100 at
most, and the API endpoint is rate limited to 15 calls per user per 15m. So
if a user has >1500 users on their block list, we can't get the whole thing
at once. :(
Returns:
sequence of actor objects
Raises:
:class:`source.RateLimited` if we hit the rate limit. The partial
attribute will have the list of user ids we fetched before hitting the
limit.
"""
return self._get_blocklist_fn(API_BLOCKS,
lambda resp: (self.user_to_actor(user) for user in resp.get('users', [])))
def get_blocklist_ids(self):
"""Returns the current user's block list as a list of Twitter user ids.
May make multiple API calls, using cursors, to fully fetch large blocklists.
https://dev.twitter.com/overview/api/cursoring
Subject to the same rate limiting as get_blocklist(), but each API call
returns ~4k ids, so realistically this can actually fetch blocklists of up
to 75k users at once. Beware though, many Twitter users have even more!
Returns:
sequence of string Twitter user ids
Raises:
:class:`source.RateLimited` if we hit the rate limit. The partial
attribute will have the list of user ids we fetched before hitting the
limit.
"""
return self._get_blocklist_fn(API_BLOCK_IDS, lambda resp: resp.get('ids', []))
def _get_blocklist_fn(self, api_endpoint, response_fn):
values = []
cursor = '-1'
while cursor and cursor != '0':
try:
resp = self.urlopen(api_endpoint % cursor)
except urllib_error.HTTPError as e:
if e.code in HTTP_RATE_LIMIT_CODES:
raise source.RateLimited(str(e), partial=values)
raise
values.extend(response_fn(resp))
cursor = resp.get('next_cursor_str')
return values
def create(self, obj, include_link=source.OMIT_LINK,
ignore_formatting=False):
"""Creates a tweet, reply tweet, retweet, or favorite.
Args:
obj: ActivityStreams object
include_link: string
ignore_formatting: boolean
Returns:
a CreationResult whose content will be a dict with 'id', 'url',
and 'type' keys (all optional) for the newly created Twitter
object (or None)
"""
return self._create(obj, preview=False, include_link=include_link,
ignore_formatting=ignore_formatting)
def preview_create(self, obj, include_link=source.OMIT_LINK,
ignore_formatting=False):
"""Previews creating a tweet, reply tweet, retweet, or favorite.
Args:
obj: ActivityStreams object
include_link: string
ignore_formatting: boolean
Returns:
a CreationResult whose content will be a unicode string HTML
snippet (or None)
"""
return self._create(obj, preview=True, include_link=include_link,
ignore_formatting=ignore_formatting)
def _create(self, obj, preview=None, include_link=source.OMIT_LINK,
ignore_formatting=False):
"""Creates or previews creating a tweet, reply tweet, retweet, or favorite.
https://dev.twitter.com/docs/api/1.1/post/statuses/update
https://dev.twitter.com/docs/api/1.1/post/statuses/retweet/:id
https://dev.twitter.com/docs/api/1.1/post/favorites/create
Args:
obj: ActivityStreams object
preview: boolean
include_link: string
ignore_formatting: boolean
Returns:
a CreationResult
If preview is True, the content will be a unicode string HTML
snippet. If False, it will be a dict with 'id' and 'url' keys
for the newly created Twitter object.
"""
assert preview in (False, True)
type = obj.get('objectType')
verb = obj.get('verb')
base_obj = self.base_object(obj)
base_id = base_obj.get('id')
base_url = base_obj.get('url')
is_reply = type == 'comment' or 'inReplyTo' in obj
is_rsvp = (verb and verb.startswith('rsvp-')) or verb == 'invite'
images = util.get_list(obj, 'image')
video_url = util.get_first(obj, 'stream', {}).get('url')
has_media = (images or video_url) and (type in ('note', 'article') or is_reply)
lat = obj.get('location', {}).get('latitude')
lng = obj.get('location', {}).get('longitude')
# prefer displayName over content for articles
type = obj.get('objectType')
prefer_content = type == 'note' or (base_url and (type == 'comment'
or obj.get('inReplyTo')))
preview_description = ''
quote_tweet_url = None
for att in obj.get('attachments', []):
url = self.URL_CANONICALIZER(att.get('url', ''))
if url and TWEET_URL_RE.match(url):
quote_tweet_url = url
preview_description += """\
<span class="verb">quote</span>
<a href="%s">this tweet</a>:<br>
%s
<br>and """ % (url, self.embed_post(att))
break
content = self._content_for_create(
obj, ignore_formatting=ignore_formatting, prefer_name=not prefer_content,
strip_first_video_tag=bool(video_url), strip_quotations=bool(quote_tweet_url))
if not content:
if type == 'activity' and not is_rsvp:
content = verb
elif has_media:
content = ''
else:
return source.creation_result(
abort=False, # keep looking for things to publish,
error_plain='No content text found.',
error_html='No content text found.')
if is_reply and base_url:
# Twitter *used* to require replies to include an @-mention of the
# original tweet's author
# https://dev.twitter.com/docs/api/1.1/post/statuses/update#api-param-in_reply_to_status_id
# ...but now we use the auto_populate_reply_metadata query param instead:
# https://dev.twitter.com/overview/api/upcoming-changes-to-tweets
# the embed URL in the preview can't start with mobile. or www., so just
# hard-code it to twitter.com. index #1 is netloc.
parsed = urllib.parse.urlparse(base_url)
parts = parsed.path.split('/')
if len(parts) < 2 or not parts[1]:
raise ValueError('Could not determine author of in-reply-to URL %s' % base_url)
reply_to_prefix = '@%s ' % parts[1].lower()
if content.lower().startswith(reply_to_prefix):
content = content[len(reply_to_prefix):]
parsed = list(parsed)
parsed[1] = self.DOMAIN
base_url = urllib.parse.urlunparse(parsed)
# need a base_url with the tweet id for the embed HTML below. do this
# *after* checking the real base_url for in-reply-to author username.
if base_id and not base_url:
base_url = 'https://twitter.com/-/statuses/' + base_id
if is_reply and not base_url:
return source.creation_result(
abort=True,
error_plain='Could not find a tweet to reply to.',
error_html='Could not find a tweet to <a href="http://indiewebcamp.com/reply">reply to</a>. '
'Check that your post has an <a href="http://indiewebcamp.com/comment">in-reply-to</a> '
'link a Twitter URL or to an original post that publishes a '
'<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')
# truncate and ellipsize content if it's over the character
# count. URLs will be t.co-wrapped, so include that when counting.
content = self._truncate(content, obj.get('url'), include_link, type,
quote_tweet=quote_tweet_url)
# linkify defaults to Twitter's link shortening behavior
preview_content = util.linkify(content, pretty=True, skip_bare_cc_tlds=True)
preview_content = MENTION_RE.sub(
r'\1<a href="https://twitter.com/\2">@\2</a>', preview_content)
preview_content = HASHTAG_RE.sub(
r'\1<a href="https://twitter.com/hashtag/\2">#\2</a>', preview_content)
if type == 'activity' and verb == 'like':
if not base_url:
return source.creation_result(
abort=True,
error_plain='Could not find a tweet to like.',
error_html='Could not find a tweet to <a href="http://indiewebcamp.com/favorite">favorite</a>. '
'Check that your post has a like-of link to a Twitter URL or to an original post that publishes a '
'<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')
if preview:
preview_description += """\
<span class="verb">favorite</span>
<a href="%s">this tweet</a>:
%s""" % (base_url, self.embed_post(base_obj))
return source.creation_result(description=preview_description)
else:
data = urllib.parse.urlencode({'id': base_id})
self.urlopen(API_POST_FAVORITE, data=data)
resp = {'type': 'like'}
elif type == 'activity' and verb == 'share':
if not base_url:
return source.creation_result(
abort=True,
error_plain='Could not find a tweet to retweet.',
error_html='Could not find a tweet to <a href="http://indiewebcamp.com/repost">retweet</a>. '
'Check that your post has a repost-of link to a Twitter URL or to an original post that publishes a '
'<a href="http://indiewebcamp.com/rel-syndication">rel-syndication</a> link to Twitter.')
if preview:
preview_description += """\
<span class="verb">retweet</span>
<a href="%s">this tweet</a>:
%s""" % (base_url, self.embed_post(base_obj))
return source.creation_result(description=preview_description)
else:
data = urllib.parse.urlencode({'id': base_id})
resp = self.urlopen(API_POST_RETWEET % base_id, data=data)
resp['type'] = 'repost'
elif type in ('note', 'article') or is_reply or is_rsvp: # a tweet
content = str(content).encode('utf-8')
data = [('status', content)]
if is_reply:
preview_description += """\
<span class="verb">@-reply</span> to <a href="%s">this tweet</a>:
%s""" % (base_url, self.embed_post(base_obj))
data.extend([
('in_reply_to_status_id', base_id),
('auto_populate_reply_metadata', 'true'),
])
else:
preview_description += '<span class="verb">tweet</span>:'
if video_url:
preview_content += ('<br /><br /><video controls src="%s"><a href="%s">'
'this video</a></video>' % (video_url, video_url))
if not preview:
ret = self.upload_video(video_url)
if isinstance(ret, source.CreationResult):
return ret
data.append(('media_ids', ret))
elif images:
num = len(images)
if num > MAX_MEDIA:
images = images[:MAX_MEDIA]
logging.warning('Found %d photos! Only using the first %d: %r',
num, MAX_MEDIA, images)
preview_content += '<br /><br />' + ' '.join(
'<img src="%s" alt="%s" />' % (img.get('url'), img.get('displayName', ''))
for img in images)
if not preview:
ret = self.upload_images(images)
if isinstance(ret, source.CreationResult):
return ret
data.append(('media_ids', ','.join(ret)))
if lat and lng:
preview_content += (
'<div>at <a href="https://maps.google.com/maps?q=%s,%s">'
'%s, %s</a></div>' % (lat, lng, lat, lng))
data.extend([
('lat', lat),
('long', lng),
])
if preview:
return source.creation_result(content=preview_content,
description=preview_description)
else:
resp = self.urlopen(API_POST_TWEET, data=urllib.parse.urlencode(sorted(data)))
resp['type'] = 'comment' if is_reply else 'post'
else:
return source.creation_result(
abort=False,
error_plain='Cannot publish type=%s, verb=%s to Twitter' % (type, verb),
error_html='Cannot publish type=%s, verb=%s to Twitter' % (type, verb))
id_str = resp.get('id_str')
if id_str:
resp.update({'id': id_str, 'url': self.tweet_url(resp)})
elif 'url' not in resp:
resp['url'] = base_url
return source.creation_result(resp)
def _truncate(self, content, url, include_link, type, quote_tweet=None):
"""Shorten tweet content to fit within the character limit.
Args:
content: string
url: string
include_link: string
type: string: 'article', 'note', etc.
quote_tweet: string URL, optional. If provided,
it will be appended to the content, *after* truncating.
Return: string, the possibly shortened and ellipsized tweet text
"""
if type == 'article':
format = brevity.FORMAT_ARTICLE
else:
format = brevity.FORMAT_NOTE
target_length = MAX_TWEET_LENGTH
if quote_tweet:
target_length -= (TCO_LENGTH + 1)
truncated = brevity.shorten(
content,
# permalink is included only when the text is truncated
permalink=url if include_link != source.OMIT_LINK else None,
# permashortlink is always included
permashortlink=url if include_link == source.INCLUDE_LINK else None,
target_length=target_length, link_length=TCO_LENGTH, format=format)
if quote_tweet:
truncated += ' ' + quote_tweet
return truncated
def upload_images(self, images):
"""Uploads one or more images from web URLs.
https://dev.twitter.com/rest/reference/post/media/upload
Note that files and JSON bodies in media POST API requests are *not*
included in OAuth signatures.
https://developer.twitter.com/en/docs/media/upload-media/uploading-media/media-best-practices
Args:
images: sequence of AS image objects, eg:
[{'url': 'http://picture', 'displayName': 'a thing'}, ...]
Returns:
list of string media ids or :class:`CreationResult` on error
"""
ids = []
for image in images:
url = image.get('url')
if not url:
continue
image_resp = util.urlopen(url)
bad_type = self._check_mime_type(url, image_resp, IMAGE_MIME_TYPES,
'JPG, PNG, GIF, and WEBP images')
if bad_type:
return bad_type
headers = twitter_auth.auth_header(
API_UPLOAD_MEDIA, self.access_token_key, self.access_token_secret, 'POST')
resp = util.requests_post(API_UPLOAD_MEDIA,
files={'media': image_resp},
headers=headers)
resp.raise_for_status()
logging.info('Got: %s', resp.text)
media_id = source.load_json(resp.text, API_UPLOAD_MEDIA)['media_id_string']
ids.append(media_id)
alt = image.get('displayName')
if alt:
alt = util.ellipsize(alt, chars=MAX_ALT_LENGTH)
headers = twitter_auth.auth_header(
API_MEDIA_METADATA, self.access_token_key, self.access_token_secret, 'POST')
resp = util.requests_post(API_MEDIA_METADATA,
json={'media_id': media_id,'alt_text': {'text': alt}},
headers=headers)
logging.info('Got: %s', resp)
return ids
def upload_video(self, url):
"""Uploads a video from web URLs using the chunked upload process.
Chunked upload consists of multiple API calls:
* command=INIT, which allocates the media id
* command=APPEND for each 5MB block, up to 15MB total
* command=FINALIZE
https://dev.twitter.com/rest/reference/post/media/upload-chunked
https://dev.twitter.com/rest/public/uploading-media#chunkedupload
Args:
url: string URL of images
Returns:
string media id or :class:`CreationResult` on error
"""
video_resp = util.urlopen(url)
bad_type = self._check_mime_type(url, video_resp, VIDEO_MIME_TYPES, 'MP4 videos')
if bad_type:
return bad_type
length = video_resp.headers.get('Content-Length')
if not util.is_int(length):
msg = "Couldn't determine your video's size."
return source.creation_result(abort=True, error_plain=msg, error_html=msg)
length = int(length)
if int(length) > MAX_VIDEO_SIZE:
msg = "Your %sMB video is larger than Twitter's %dMB limit." % (
length // MB, MAX_VIDEO_SIZE // MB)
return source.creation_result(abort=True, error_plain=msg, error_html=msg)
# INIT
media_id = self.urlopen(API_UPLOAD_MEDIA, data=urllib.parse.urlencode({
'command': 'INIT',
'media_type': 'video/mp4',
'total_bytes': length,
}))['media_id_string']
# APPEND
headers = twitter_auth.auth_header(
API_UPLOAD_MEDIA, self.access_token_key, self.access_token_secret, 'POST')
i = 0
while True: