From 480d759b3736c73b8494f02422147e3a4a4a4c80 Mon Sep 17 00:00:00 2001 From: Linus2punkt0 Date: Mon, 8 Jan 2024 11:35:25 +0100 Subject: [PATCH] Small fix for external URL-handling --- crosspost.py | 1105 +++++++++++++++++++++++++------------------------- 1 file changed, 553 insertions(+), 552 deletions(-) diff --git a/crosspost.py b/crosspost.py index a164747..34e11fa 100644 --- a/crosspost.py +++ b/crosspost.py @@ -1,552 +1,553 @@ -from atproto import Client -import tweepy -from mastodon import Mastodon -from datetime import datetime, timedelta -from auth import * -from paths import * -import settings -import json, os, urllib.request, random, string, shutil, re - -date_in_format = '%Y-%m-%dT%H:%M:%S' - -# Setting up connections to bluesky, twitter and mastodon - -bsky = Client() -bsky.login(bsky_handle, bsky_password) -# After changes in twitters API we need to use tweepy.Client to make posts as it uses version 2.0 of the API. -# However, uploading images is still not included in 2.0, so for that we need to use tweepy.API, which uses -# the previous version. -if settings.Twitter: - twitter = tweepy.Client(consumer_key=TWITTER_APP_KEY, - consumer_secret=TWITTER_APP_SECRET, - access_token=TWITTER_ACCESS_TOKEN, - access_token_secret=TWITTER_ACCESS_TOKEN_SECRET) - - tweepy_auth = tweepy.OAuth1UserHandler(TWITTER_APP_KEY, TWITTER_APP_SECRET, TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET) - twitter_images = tweepy.API(tweepy_auth) - -if settings.Mastodon: - mastodon = Mastodon( - access_token = MASTODON_TOKEN, - api_base_url = MASTODON_INSTANCE - ) - -# Getting posts from bluesky - -def getPosts(): - writeLog("Gathering posts") - posts = {} - # Getting feed of user - profile_feed = bsky.app.bsky.feed.get_author_feed({'actor': bsky_handle}) - for feed_view in profile_feed.feed: - if feed_view.post.author.handle != bsky_handle: - continue - # Post type "post" means it is not a quote post. - postType = "post" - # If post has an embed of type record it is a quote post, and should not be crossposted - cid = feed_view.post.cid - text = feed_view.post.record.text - # Sometimes bluesky shortens URLs and in that case we need to restore them before crossposting - if feed_view.post.record.facets: - text = restoreUrls(feed_view.post.record) - langs = feed_view.post.record.langs - timestamp = datetime.strptime(feed_view.post.indexed_at.split(".")[0], date_in_format) + timedelta(hours = 2) - # Setting replyToUser to the same as user handle and only changing it if the tweet is an actual reply. - # This way we can just check if the variable is the same as the user handle later and send through - # both tweets that are not replies, and posts that are part of a thread. - replyToUser = bsky_handle - replyTo = "" - # Checking if post is a quote post. Posts with references to feeds look like quote posts but aren't, and so will fail on missing attribute. - # Since quote posts can give values in two different ways it's a bit of a hassle to double check if it is an actual quote post, - # so instead I just try to run the function and if it fails I skip the post - # If there is some reason you would want to crosspost a post referencing a bluesky-feed that I'm not seeing, I might update this in the future. - if feed_view.post.embed and hasattr(feed_view.post.embed, "record"): - try: - replyToUser, replyTo = getQuotePost(feed_view.post.embed.record) - postType = "quote" - except: - writeLog("Post is of a type the crossposter can't parse.") - continue - # Checking if post is regular reply - elif feed_view.post.record.reply: - postType = "reply" - replyTo = feed_view.post.record.reply.parent.cid - # Poster will try to fetch reply to-username the "ordinary" way, - # and if it fails, it will try getting the entire thread and - # finding it that way - try: - replyToUser = feed_view.reply.parent.author.handle - except: - replyToUser = getReplyToUser(feed_view.post.record.reply.parent) - # If unable to fetch user that was replied to, code will skip this post. - if not replyToUser: - writeLog("Unable to find the user that this post replies to or quotes") - continue - # Checking if post is by user (i.e. not a repost), withing timelimit and either not a reply or a reply in a thread. - if timestamp > datetime.now() - timedelta(hours = settings.postTimeLimit) and replyToUser == bsky_handle: - # Fetching images if there are any in the post - imageData = "" - images = [] - if feed_view.post.embed and hasattr(feed_view.post.embed, "images"): - imageData = feed_view.post.embed.images - elif feed_view.post.embed and hasattr(feed_view.post.embed, "media") and postType == "quote": - imageData = feed_view.post.embed.media.images - elif feed_view.post.embed and hasattr(feed_view.post.embed, "external") and hasattr(feed_view.post.embed.external, "uri"): - text += '\n'+feed_view.post.embed.external.uri - - if imageData: - for image in imageData: - images.append({"url": image.fullsize, "alt": image.alt}) - postInfo = { - "text": text, - "replyTo": replyTo, - "images": images, - "type": postType, - "langs": langs - } - # Saving post to posts dictionary - posts[cid] = postInfo; - return posts - -# Function for getting username of person replied to. It can mostly be retrieved from the reply section of the tweet that has been fetched, -# but in cases where the original post in a thread has been deleted it causes some weirdness. Hopefully this resolves it. -def getReplyToUser(reply): - uri = reply.uri - username = "" - try: - response = bsky.app.bsky.feed.get_post_thread(params={"uri": uri}) - username = response.thread.post.author.handle - except: - writeLog("Unable to retrieve replyTo-user.") - return username - -# Function for getting included images. If no images are included, an empty list will be returned, -# and the posting functions will know not to include any images. -def getImages(images): - localImages = [] - for image in images: - # Getting alt text for image. If there is none this will be an empty string. - alt = image["alt"] - # Giving the image just a random filename - filename = ''.join(random.choice(string.ascii_lowercase) for i in range(10)) + ".jpg" - filename = imagePath + filename - # Downloading fullsize version of image - urllib.request.urlretrieve(image["url"], filename) - # Saving image info in a dictionary and adding it to the list. - imageInfo = { - "filename": filename, - "alt": alt - } - localImages.append(imageInfo) - return localImages - -# Function for restoring shortened URLS -def restoreUrls(record): - text = record.text - encodedText = text.encode("UTF-8") - for facet in record.facets: - if facet.features[0].py_type != "app.bsky.richtext.facet#link": - continue - url = facet.features[0].uri - # The index section designates where a URL starts end ends. Using this we can pick out the exact - # string representing the URL in the post, and replace it with the actual URL. - start = facet.index.byte_start - end = facet.index.byte_end - section = encodedText[start:end] - shortened = section.decode("UTF-8") - text = text.replace(shortened, url) - return text - -def getQuotePost(post): - if isinstance(post, dict): - user = post["record"]["author"]["handle"] - cid = post["record"]["cid"] - elif hasattr(post, "author"): - user = post.author.handle - cid = post.cid - else: - user = post.record.author.handle - cid = post.record.cid - return user, cid - -# Deprecated function -def imageFail(post): - if (post.embed and (hasattr(post.record.embed, "image") or hasattr(post.record.embed, "media")) - and not hasattr(post.embed, "images")): - return True - else: - return False - -def post(posts): - # The updates status is set to false until anything has been altered in the databse. If nothing has been posted in a run, we skip resaving the database. - updates = False - # Running through the posts dictionary reversed, to get oldest posts first. - for cid in reversed(list(posts.keys())): - # Checking if the post is already in the database, and in that case getting the IDs for the post - # on twitter and mastodon. If one or both of these IDs are empty, post will be sent. - tweetId = "" - tootId = "" - tFail = 0 - mFail = 0 - if cid in database: - tweetId = database[cid]["ids"]["twitterId"] - tootId = database[cid]["ids"]["mastodonId"] - tFail = database[cid]["failed"]["twitter"] - mFail = database[cid]["failed"]["mastodon"] - if mFail >= settings.maxRetries: - writeLog("Error limit reached, not posting to Mastodon") - if not tootId: - updates = True - tootId = "FailedToPost" - if tFail >= settings.maxRetries: - writeLog("Error limit reached, not posting to Twitter") - if not tweetId: - updates = True - tweetId = "FailedToPost" - text = posts[cid]["text"] - replyTo = posts[cid]["replyTo"] - images = posts[cid]["images"] - postType = posts[cid]["type"] - langs = posts[cid]["langs"] - tweetReply = "" - tootReply = "" - # If it is a reply, we get the IDs of the posts we want to reply to from the database. - # If post is not found in database, we can't continue the thread on mastodon and twitter, - # and so we skip it. - if replyTo in database: - tweetReply = database[replyTo]["ids"]["twitterId"] - tootReply = database[replyTo]["ids"]["mastodonId"] - elif replyTo and replyTo not in database: - writeLog("Post was a reply to a post that is not in the database.") - continue - # If either tweet or toot has not previously been posted, we download images (given the post includes images). - if images and (not tweetId or not tootId): - images = getImages(images) - # Trying to post to twitter and mastodon. If posting fails the post ID for each service is set to an - # empty string, letting the code know it should try again next time the code is run. - if not tweetId and tweetReply != "skipped" and tweetReply != "FailedToPost": - updates = True - try: - tweetId = tweet(text, tweetReply, images, postType, langToggle(langs, "twitter")) - except Exception as error: - writeLog(error) - tFail += 1 - tweetId = "" - else: - writeLog("Not posting to Twitter") - # Mastodon does not have a quote retweet function, so those will just be sent as replies. - if not tootId and tootReply != "skipped" and tootReply != "FailedToPost": - updates = True - try: - tootId = toot(text, tootReply, images, langToggle(langs, "mastodon")) - except Exception as error: - writeLog(error) - mFail += 1 - tootId = "" - else: - writeLog("Not posting to Mastodon") - # Saving post to database - jsonWrite(cid, tweetId, tootId, {"twitter": tFail, "mastodon": mFail}) - return updates - -# This function uses the language selection as a way to select which posts should be crossposted. -def langToggle(langs, service): - if service == "twitter": - langToggle = settings.twitterLang - elif service == "mastodon": - langToggle = settings.mastodonLang - else: - writeLog("Something has gone very wrong") - exit() - if not langToggle: - return True - if langs and langToggle in langs: - return (not settings.postDefault) - else: - return settings.postDefault - -# Function for posting tweets -def tweet(post, replyTo, images, postType, doPost): - if not settings.Twitter or not doPost: - return "skipped"; - mediaIds = [] - # If post includes images, images are uploaded so that they can be included in the tweet - if images: - mediaIds = [] - for image in images: - filename = image["filename"] - alt = image["alt"] - if len(alt) > 1000: - alt = alt[:996] + "..." - res = twitter_images.media_upload(filename) - id = res.media_id - # If alt text was added to the image on bluesky, it's also added to the image on twitter. - if alt: - writeLog("Uploading image " + filename + " with alt: " + alt + " to twitter") - twitter_images.create_media_metadata(id, alt) - mediaIds.append(id) - # Checking if the post is longer than 280 characters, and if so sending to the - # splitPost-function. - partTwo = "" - if postLength(post) > 280: - post, partTwo = splitPost(post) - # If the function does not return a post, splitting failed and we will skip this post. - if not post: - return "skipped" - # I wanted to make this part a little neater, but didn't get it to work and gave up. So here we are. - # If post is both reply and has images it is posted as both a reply and with images (duh), if it's - # a quote with images it's posted as that. If just either of the three it is posted as just that, - # and if neither it is just posted as a text post. - if replyTo and mediaIds and postType == "quote": - a = twitter.create_tweet(text=post, quote_tweet_id=replyTo, media_ids=mediaIds) - elif replyTo and mediaIds and postType == "reply": - a = twitter.create_tweet(text=post, in_reply_to_tweet_id=replyTo, media_ids=mediaIds) - elif postType == "quote": - a = twitter.create_tweet(text=post, quote_tweet_id=replyTo) - elif replyTo: - a = twitter.create_tweet(text=post, in_reply_to_tweet_id=replyTo) - elif mediaIds: - a = twitter.create_tweet(text=post, media_ids=mediaIds) - else: - a = twitter.create_tweet(text=post) - writeLog("Posted to twitter") - id = a[0]["id"] - if partTwo: - a = twitter.create_tweet(text=partTwo, in_reply_to_tweet_id=id) - id = a[0]["id"] - return id - -# More or less the exact same function as for tweeting, but for tooting. -def toot(post, replyTo, images, doPost): - if not settings.Mastodon or not doPost: - return "skipped"; - mediaIds = [] - # If post includes images, images are uploaded so that they can be included in the toot - if images: - for image in images: - filename = image["filename"] - alt = image["alt"] - # If alt text was added to the image on bluesky, it's also added to the image on mastodon, - # otherwise it will be uploaded without alt text. - if alt: - writeLog("Uploading image " + filename + " with alt: " + alt + " to mastodon") - res = mastodon.media_post(filename, description=alt) - else: - writeLog("Uploading image " + filename) - res = mastodon.media_post(filename) - mediaIds.append(res.id) - # Visibility is set to whatever is set in the settings file. If that is hybrid, it sets the visibility either to public or unlisted depending on - # if it is a reply in a thread or not. - visibility = settings.mastodonVisibility - if visibility == "hybrid" and replyTo: - visibility = "unlisted" - elif visibility == "hybrid": - visibility = "public" - # I wanted to make this part a little neater, but didn't get it to work and gave up. So here we are. - # If post is both reply and has images it is posted as both a reply and with images (duh). - # If just either of the two it is posted with just that, and if neither it is just posted as a text post. - if replyTo and mediaIds: - a = mastodon.status_post(post, in_reply_to_id=replyTo, media_ids=mediaIds, visibility=visibility) - elif replyTo: - a = mastodon.status_post(post, in_reply_to_id=replyTo, visibility=visibility) - elif mediaIds: - a = mastodon.status_post(post, media_ids=mediaIds, visibility=visibility) - else: - a = mastodon.status_post(post, visibility=visibility) - writeLog("Posted to mastodon") - id = a["id"] - return id - -# Function for correctly counting post length -def postLength(post): - # Twitter shortens urls to 23 characters - shortUrlLength = 23 - length = len(post) - # Finding all urls and calculating how much shorter the post will be after shortening - regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" - urls = re.findall(regex, post) - for url in urls: - urlLength = len(url[0]) - if urlLength > shortUrlLength: - length = length - (urlLength - shortUrlLength) - return length - -# Function for splitting up posts that are too long for twitter. -def splitPost(text): - writeLog("Splitting post that is too long for twitter.") - first = text - # We first try to split the post into sentences, and send as many as can fit in the first one, - # and the rest in the second. - sentences = text.split(". ") - i = 1 - while len(first) > 280 and i < len(sentences): - first = ".".join(sentences[:(len(sentences) - i)]) + "." - second = ".".join(sentences[(len(sentences) - i):]) - i += 1 - # If splitting by sentance does not result in a short enough post, we try splitting by words instead. - if len(first) > 280: - first = text - words = text.split(" ") - i = 1 - while len(first) > 280 and i < len(words): - first = " ".join(words[:(len(words) - i)]) - second = " ".join(words[(len(words) - i):]) - i += 1 - # If splitting has ended up with either a first or second part that is too long, we return empty - # strings and the post is not sent to twitter. - if len(first) > 280 or len(second) > 280: - writeLog("Was not able to split post.") - first = "" - second = "" - return first, second - -# Function for writing new lines to the database -def jsonWrite(skeet, tweet, toot, failed): - ids = { - "twitterId": tweet, - "mastodonId": toot - } - data = { - "ids": ids, - "failed": failed - } - # When running, the code saves the database to memory, so instead of just saving the post to the database file, - # we also save it to the open database. This also overwrites the version of the post in memory in case - # an ID that was missing because of a previous failure. - database[skeet] = data - row = { - "skeet": skeet, - "ids": ids, - "failed": failed - } - jsonString = json.dumps(row) - # If the database file exists we want to append to it, otherwise we create it anew. - if os.path.exists(databasePath): - append_write = 'a' - else: - append_write = 'w' - # Skipping adding posts to db file if they are already in it. - if not isInDB(jsonString): - writeLog("Adding to database: " + jsonString) - file = open(databasePath, append_write) - file.write(jsonString + "\n") - file.close() - -# Function for reading database file and saving values in a dictionary -def jsonRead(): - database = {} - if not os.path.exists(databasePath): - return database - with open(databasePath, 'r') as file: - for line in file: - try: - jsonLine = json.loads(line) - except: - continue - skeet = jsonLine["skeet"] - ids = jsonLine["ids"] - failed = {"twitter": 0, "mastodon": 0} - if "failed" in jsonLine: - failed = jsonLine["failed"] - lineData = { - "ids": ids, - "failed": failed - } - database[skeet] = lineData - return database; - -# Function for checking if a line is already in the database-file -def isInDB(line): - if not os.path.exists(databasePath): - return False - with open(databasePath, 'r') as file: - content = file.read() - if line in content: - return True - else: - return False - -# Function for writing to the log file -def writeLog(message): - now = datetime.now().strftime("%d/%m/%Y %H:%M:%S") - date = datetime.now().strftime("%y%m%d") - message = str(now) + ": " + str(message) + "\n" - print(message) - if not settings.Logging: - return; - log = logPath + date + ".log" - if os.path.exists(log): - append_write = 'a' - else: - append_write = 'w' - dst = open(log, append_write) - dst.write(message) - dst.close() - -# Cleaning up downloaded images -def cleanup(): - writeLog("Deleting local images") - for filename in os.listdir(imagePath): - file_path = os.path.join(imagePath, filename) - try: - if os.path.isfile(file_path) or os.path.islink(file_path): - os.unlink(file_path) - elif os.path.isdir(file_path): - shutil.rmtree(file_path) - except Exception as e: - writeLog('Failed to delete %s. Reason: %s' % (file_path, e)) - -# Since we are working with a version of the database in memory, at the end of the run -# we completely overwrite the database on file with the one in memory. -# This does kind of make it uneccessary to write each new post to the file while running, -# but in case the program fails halfway through it gives us kind of a backup. -def saveDB(): - writeLog("Saving new database") - append_write = "w" - for skeet in database: - row = { - "skeet": skeet, - "ids": database[skeet]["ids"], - "failed": database[skeet]["failed"] - } - jsonString = json.dumps(row) - file = open(databasePath, append_write) - file.write(jsonString + "\n") - file.close() - append_write = "a" - -# Function for counting lines in a file -def countLines(file): - with open(file, 'r') as file: - for count, line in enumerate(file): - pass - return count - -# Every twelve hours a backup of the database is saved, in case something happens to the live database. -# If the live database contains fewer lines than the backup it means something has probably gone wrong, -# and before the live database is saved as a backup, the current backup is saved as a new file, so that -# it can be recovered later. -def dbBackup(): - if not os.path.isfile(databasePath) or (os.path.isfile(backupPath) - and datetime.fromtimestamp(os.stat(backupPath).st_mtime) > datetime.now() - timedelta(hours = 24)): - return - if os.path.isfile(backupPath): - if countLines(backupPath) < countLines(databasePath): - os.remove(backupPath) - else: - date = datetime.now().strftime("%y%m%d") - os.rename(backupPath, backupPath + "_" + date) - writeLog("Current backup file contains more entries than current live database, backup saved") - shutil.copyfile(databasePath, backupPath) - writeLog("Backup of database taken") - -# Here the whole thing is run -database = jsonRead() -posts = getPosts() -updates = post(posts) -if updates: - saveDB() - cleanup() -dbBackup() -if not posts: - writeLog("No new posts found.") +from atproto import Client +import tweepy +from mastodon import Mastodon +from datetime import datetime, timedelta +from auth import * +from paths import * +import settings +import json, os, urllib.request, random, string, shutil, re + +date_in_format = '%Y-%m-%dT%H:%M:%S' + +# Setting up connections to bluesky, twitter and mastodon + +bsky = Client() +bsky.login(bsky_handle, bsky_password) +# After changes in twitters API we need to use tweepy.Client to make posts as it uses version 2.0 of the API. +# However, uploading images is still not included in 2.0, so for that we need to use tweepy.API, which uses +# the previous version. +if settings.Twitter: + twitter = tweepy.Client(consumer_key=TWITTER_APP_KEY, + consumer_secret=TWITTER_APP_SECRET, + access_token=TWITTER_ACCESS_TOKEN, + access_token_secret=TWITTER_ACCESS_TOKEN_SECRET) + + tweepy_auth = tweepy.OAuth1UserHandler(TWITTER_APP_KEY, TWITTER_APP_SECRET, TWITTER_ACCESS_TOKEN, TWITTER_ACCESS_TOKEN_SECRET) + twitter_images = tweepy.API(tweepy_auth) + +if settings.Mastodon: + mastodon = Mastodon( + access_token = MASTODON_TOKEN, + api_base_url = MASTODON_INSTANCE + ) + +# Getting posts from bluesky + +def getPosts(): + writeLog("Gathering posts") + posts = {} + # Getting feed of user + profile_feed = bsky.app.bsky.feed.get_author_feed({'actor': bsky_handle}) + for feed_view in profile_feed.feed: + if feed_view.post.author.handle != bsky_handle: + continue + # Post type "post" means it is not a quote post. + postType = "post" + # If post has an embed of type record it is a quote post, and should not be crossposted + cid = feed_view.post.cid + text = feed_view.post.record.text + # Sometimes bluesky shortens URLs and in that case we need to restore them before crossposting + if feed_view.post.record.facets: + text = restoreUrls(feed_view.post.record) + langs = feed_view.post.record.langs + timestamp = datetime.strptime(feed_view.post.indexed_at.split(".")[0], date_in_format) + timedelta(hours = 2) + # Setting replyToUser to the same as user handle and only changing it if the tweet is an actual reply. + # This way we can just check if the variable is the same as the user handle later and send through + # both tweets that are not replies, and posts that are part of a thread. + replyToUser = bsky_handle + replyTo = "" + # Checking if post is a quote post. Posts with references to feeds look like quote posts but aren't, and so will fail on missing attribute. + # Since quote posts can give values in two different ways it's a bit of a hassle to double check if it is an actual quote post, + # so instead I just try to run the function and if it fails I skip the post + # If there is some reason you would want to crosspost a post referencing a bluesky-feed that I'm not seeing, I might update this in the future. + if feed_view.post.embed and hasattr(feed_view.post.embed, "record"): + try: + replyToUser, replyTo = getQuotePost(feed_view.post.embed.record) + postType = "quote" + except: + writeLog("Post is of a type the crossposter can't parse.") + continue + # Checking if post is regular reply + elif feed_view.post.record.reply: + postType = "reply" + replyTo = feed_view.post.record.reply.parent.cid + # Poster will try to fetch reply to-username the "ordinary" way, + # and if it fails, it will try getting the entire thread and + # finding it that way + try: + replyToUser = feed_view.reply.parent.author.handle + except: + replyToUser = getReplyToUser(feed_view.post.record.reply.parent) + # If unable to fetch user that was replied to, code will skip this post. + if not replyToUser: + writeLog("Unable to find the user that this post replies to or quotes") + continue + # Checking if post is by user (i.e. not a repost), withing timelimit and either not a reply or a reply in a thread. + if timestamp > datetime.now() - timedelta(hours = settings.postTimeLimit) and replyToUser == bsky_handle: + # Fetching images if there are any in the post + imageData = "" + images = [] + if feed_view.post.embed and hasattr(feed_view.post.embed, "images"): + imageData = feed_view.post.embed.images + elif feed_view.post.embed and hasattr(feed_view.post.embed, "media") and postType == "quote": + imageData = feed_view.post.embed.media.images + # Sometimes posts have included links that are not included in the actual text of the post. This adds adds that back. + if feed_view.post.embed and hasattr(feed_view.post.embed, "external") and hasattr(feed_view.post.embed.external, "uri"): + if feed_view.post.embed.external.uri not in text: + text += '\n'+feed_view.post.embed.external.uri + if imageData: + for image in imageData: + images.append({"url": image.fullsize, "alt": image.alt}) + postInfo = { + "text": text, + "replyTo": replyTo, + "images": images, + "type": postType, + "langs": langs + } + # Saving post to posts dictionary + posts[cid] = postInfo; + return posts + +# Function for getting username of person replied to. It can mostly be retrieved from the reply section of the tweet that has been fetched, +# but in cases where the original post in a thread has been deleted it causes some weirdness. Hopefully this resolves it. +def getReplyToUser(reply): + uri = reply.uri + username = "" + try: + response = bsky.app.bsky.feed.get_post_thread(params={"uri": uri}) + username = response.thread.post.author.handle + except: + writeLog("Unable to retrieve replyTo-user.") + return username + +# Function for getting included images. If no images are included, an empty list will be returned, +# and the posting functions will know not to include any images. +def getImages(images): + localImages = [] + for image in images: + # Getting alt text for image. If there is none this will be an empty string. + alt = image["alt"] + # Giving the image just a random filename + filename = ''.join(random.choice(string.ascii_lowercase) for i in range(10)) + ".jpg" + filename = imagePath + filename + # Downloading fullsize version of image + urllib.request.urlretrieve(image["url"], filename) + # Saving image info in a dictionary and adding it to the list. + imageInfo = { + "filename": filename, + "alt": alt + } + localImages.append(imageInfo) + return localImages + +# Function for restoring shortened URLS +def restoreUrls(record): + text = record.text + encodedText = text.encode("UTF-8") + for facet in record.facets: + if facet.features[0].py_type != "app.bsky.richtext.facet#link": + continue + url = facet.features[0].uri + # The index section designates where a URL starts end ends. Using this we can pick out the exact + # string representing the URL in the post, and replace it with the actual URL. + start = facet.index.byte_start + end = facet.index.byte_end + section = encodedText[start:end] + shortened = section.decode("UTF-8") + text = text.replace(shortened, url) + return text + +def getQuotePost(post): + if isinstance(post, dict): + user = post["record"]["author"]["handle"] + cid = post["record"]["cid"] + elif hasattr(post, "author"): + user = post.author.handle + cid = post.cid + else: + user = post.record.author.handle + cid = post.record.cid + return user, cid + +# Deprecated function +def imageFail(post): + if (post.embed and (hasattr(post.record.embed, "image") or hasattr(post.record.embed, "media")) + and not hasattr(post.embed, "images")): + return True + else: + return False + +def post(posts): + # The updates status is set to false until anything has been altered in the databse. If nothing has been posted in a run, we skip resaving the database. + updates = False + # Running through the posts dictionary reversed, to get oldest posts first. + for cid in reversed(list(posts.keys())): + # Checking if the post is already in the database, and in that case getting the IDs for the post + # on twitter and mastodon. If one or both of these IDs are empty, post will be sent. + tweetId = "" + tootId = "" + tFail = 0 + mFail = 0 + if cid in database: + tweetId = database[cid]["ids"]["twitterId"] + tootId = database[cid]["ids"]["mastodonId"] + tFail = database[cid]["failed"]["twitter"] + mFail = database[cid]["failed"]["mastodon"] + if mFail >= settings.maxRetries: + writeLog("Error limit reached, not posting to Mastodon") + if not tootId: + updates = True + tootId = "FailedToPost" + if tFail >= settings.maxRetries: + writeLog("Error limit reached, not posting to Twitter") + if not tweetId: + updates = True + tweetId = "FailedToPost" + text = posts[cid]["text"] + replyTo = posts[cid]["replyTo"] + images = posts[cid]["images"] + postType = posts[cid]["type"] + langs = posts[cid]["langs"] + tweetReply = "" + tootReply = "" + # If it is a reply, we get the IDs of the posts we want to reply to from the database. + # If post is not found in database, we can't continue the thread on mastodon and twitter, + # and so we skip it. + if replyTo in database: + tweetReply = database[replyTo]["ids"]["twitterId"] + tootReply = database[replyTo]["ids"]["mastodonId"] + elif replyTo and replyTo not in database: + writeLog("Post was a reply to a post that is not in the database.") + continue + # If either tweet or toot has not previously been posted, we download images (given the post includes images). + if images and (not tweetId or not tootId): + images = getImages(images) + # Trying to post to twitter and mastodon. If posting fails the post ID for each service is set to an + # empty string, letting the code know it should try again next time the code is run. + if not tweetId and tweetReply != "skipped" and tweetReply != "FailedToPost": + updates = True + try: + tweetId = tweet(text, tweetReply, images, postType, langToggle(langs, "twitter")) + except Exception as error: + writeLog(error) + tFail += 1 + tweetId = "" + else: + writeLog("Not posting to Twitter") + # Mastodon does not have a quote retweet function, so those will just be sent as replies. + if not tootId and tootReply != "skipped" and tootReply != "FailedToPost": + updates = True + try: + tootId = toot(text, tootReply, images, langToggle(langs, "mastodon")) + except Exception as error: + writeLog(error) + mFail += 1 + tootId = "" + else: + writeLog("Not posting to Mastodon") + # Saving post to database + jsonWrite(cid, tweetId, tootId, {"twitter": tFail, "mastodon": mFail}) + return updates + +# This function uses the language selection as a way to select which posts should be crossposted. +def langToggle(langs, service): + if service == "twitter": + langToggle = settings.twitterLang + elif service == "mastodon": + langToggle = settings.mastodonLang + else: + writeLog("Something has gone very wrong") + exit() + if not langToggle: + return True + if langs and langToggle in langs: + return (not settings.postDefault) + else: + return settings.postDefault + +# Function for posting tweets +def tweet(post, replyTo, images, postType, doPost): + if not settings.Twitter or not doPost: + return "skipped"; + mediaIds = [] + # If post includes images, images are uploaded so that they can be included in the tweet + if images: + mediaIds = [] + for image in images: + filename = image["filename"] + alt = image["alt"] + if len(alt) > 1000: + alt = alt[:996] + "..." + res = twitter_images.media_upload(filename) + id = res.media_id + # If alt text was added to the image on bluesky, it's also added to the image on twitter. + if alt: + writeLog("Uploading image " + filename + " with alt: " + alt + " to twitter") + twitter_images.create_media_metadata(id, alt) + mediaIds.append(id) + # Checking if the post is longer than 280 characters, and if so sending to the + # splitPost-function. + partTwo = "" + if postLength(post) > 280: + post, partTwo = splitPost(post) + # If the function does not return a post, splitting failed and we will skip this post. + if not post: + return "skipped" + # I wanted to make this part a little neater, but didn't get it to work and gave up. So here we are. + # If post is both reply and has images it is posted as both a reply and with images (duh), if it's + # a quote with images it's posted as that. If just either of the three it is posted as just that, + # and if neither it is just posted as a text post. + if replyTo and mediaIds and postType == "quote": + a = twitter.create_tweet(text=post, quote_tweet_id=replyTo, media_ids=mediaIds) + elif replyTo and mediaIds and postType == "reply": + a = twitter.create_tweet(text=post, in_reply_to_tweet_id=replyTo, media_ids=mediaIds) + elif postType == "quote": + a = twitter.create_tweet(text=post, quote_tweet_id=replyTo) + elif replyTo: + a = twitter.create_tweet(text=post, in_reply_to_tweet_id=replyTo) + elif mediaIds: + a = twitter.create_tweet(text=post, media_ids=mediaIds) + else: + a = twitter.create_tweet(text=post) + writeLog("Posted to twitter") + id = a[0]["id"] + if partTwo: + a = twitter.create_tweet(text=partTwo, in_reply_to_tweet_id=id) + id = a[0]["id"] + return id + +# More or less the exact same function as for tweeting, but for tooting. +def toot(post, replyTo, images, doPost): + if not settings.Mastodon or not doPost: + return "skipped"; + mediaIds = [] + # If post includes images, images are uploaded so that they can be included in the toot + if images: + for image in images: + filename = image["filename"] + alt = image["alt"] + # If alt text was added to the image on bluesky, it's also added to the image on mastodon, + # otherwise it will be uploaded without alt text. + if alt: + writeLog("Uploading image " + filename + " with alt: " + alt + " to mastodon") + res = mastodon.media_post(filename, description=alt) + else: + writeLog("Uploading image " + filename) + res = mastodon.media_post(filename) + mediaIds.append(res.id) + # Visibility is set to whatever is set in the settings file. If that is hybrid, it sets the visibility either to public or unlisted depending on + # if it is a reply in a thread or not. + visibility = settings.mastodonVisibility + if visibility == "hybrid" and replyTo: + visibility = "unlisted" + elif visibility == "hybrid": + visibility = "public" + # I wanted to make this part a little neater, but didn't get it to work and gave up. So here we are. + # If post is both reply and has images it is posted as both a reply and with images (duh). + # If just either of the two it is posted with just that, and if neither it is just posted as a text post. + if replyTo and mediaIds: + a = mastodon.status_post(post, in_reply_to_id=replyTo, media_ids=mediaIds, visibility=visibility) + elif replyTo: + a = mastodon.status_post(post, in_reply_to_id=replyTo, visibility=visibility) + elif mediaIds: + a = mastodon.status_post(post, media_ids=mediaIds, visibility=visibility) + else: + a = mastodon.status_post(post, visibility=visibility) + writeLog("Posted to mastodon") + id = a["id"] + return id + +# Function for correctly counting post length +def postLength(post): + # Twitter shortens urls to 23 characters + shortUrlLength = 23 + length = len(post) + # Finding all urls and calculating how much shorter the post will be after shortening + regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" + urls = re.findall(regex, post) + for url in urls: + urlLength = len(url[0]) + if urlLength > shortUrlLength: + length = length - (urlLength - shortUrlLength) + return length + +# Function for splitting up posts that are too long for twitter. +def splitPost(text): + writeLog("Splitting post that is too long for twitter.") + first = text + # We first try to split the post into sentences, and send as many as can fit in the first one, + # and the rest in the second. + sentences = text.split(". ") + i = 1 + while len(first) > 280 and i < len(sentences): + first = ".".join(sentences[:(len(sentences) - i)]) + "." + second = ".".join(sentences[(len(sentences) - i):]) + i += 1 + # If splitting by sentance does not result in a short enough post, we try splitting by words instead. + if len(first) > 280: + first = text + words = text.split(" ") + i = 1 + while len(first) > 280 and i < len(words): + first = " ".join(words[:(len(words) - i)]) + second = " ".join(words[(len(words) - i):]) + i += 1 + # If splitting has ended up with either a first or second part that is too long, we return empty + # strings and the post is not sent to twitter. + if len(first) > 280 or len(second) > 280: + writeLog("Was not able to split post.") + first = "" + second = "" + return first, second + +# Function for writing new lines to the database +def jsonWrite(skeet, tweet, toot, failed): + ids = { + "twitterId": tweet, + "mastodonId": toot + } + data = { + "ids": ids, + "failed": failed + } + # When running, the code saves the database to memory, so instead of just saving the post to the database file, + # we also save it to the open database. This also overwrites the version of the post in memory in case + # an ID that was missing because of a previous failure. + database[skeet] = data + row = { + "skeet": skeet, + "ids": ids, + "failed": failed + } + jsonString = json.dumps(row) + # If the database file exists we want to append to it, otherwise we create it anew. + if os.path.exists(databasePath): + append_write = 'a' + else: + append_write = 'w' + # Skipping adding posts to db file if they are already in it. + if not isInDB(jsonString): + writeLog("Adding to database: " + jsonString) + file = open(databasePath, append_write) + file.write(jsonString + "\n") + file.close() + +# Function for reading database file and saving values in a dictionary +def jsonRead(): + database = {} + if not os.path.exists(databasePath): + return database + with open(databasePath, 'r') as file: + for line in file: + try: + jsonLine = json.loads(line) + except: + continue + skeet = jsonLine["skeet"] + ids = jsonLine["ids"] + failed = {"twitter": 0, "mastodon": 0} + if "failed" in jsonLine: + failed = jsonLine["failed"] + lineData = { + "ids": ids, + "failed": failed + } + database[skeet] = lineData + return database; + +# Function for checking if a line is already in the database-file +def isInDB(line): + if not os.path.exists(databasePath): + return False + with open(databasePath, 'r') as file: + content = file.read() + if line in content: + return True + else: + return False + +# Function for writing to the log file +def writeLog(message): + now = datetime.now().strftime("%d/%m/%Y %H:%M:%S") + date = datetime.now().strftime("%y%m%d") + message = str(now) + ": " + str(message) + "\n" + print(message) + if not settings.Logging: + return; + log = logPath + date + ".log" + if os.path.exists(log): + append_write = 'a' + else: + append_write = 'w' + dst = open(log, append_write) + dst.write(message) + dst.close() + +# Cleaning up downloaded images +def cleanup(): + writeLog("Deleting local images") + for filename in os.listdir(imagePath): + file_path = os.path.join(imagePath, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + writeLog('Failed to delete %s. Reason: %s' % (file_path, e)) + +# Since we are working with a version of the database in memory, at the end of the run +# we completely overwrite the database on file with the one in memory. +# This does kind of make it uneccessary to write each new post to the file while running, +# but in case the program fails halfway through it gives us kind of a backup. +def saveDB(): + writeLog("Saving new database") + append_write = "w" + for skeet in database: + row = { + "skeet": skeet, + "ids": database[skeet]["ids"], + "failed": database[skeet]["failed"] + } + jsonString = json.dumps(row) + file = open(databasePath, append_write) + file.write(jsonString + "\n") + file.close() + append_write = "a" + +# Function for counting lines in a file +def countLines(file): + with open(file, 'r') as file: + for count, line in enumerate(file): + pass + return count + +# Every twelve hours a backup of the database is saved, in case something happens to the live database. +# If the live database contains fewer lines than the backup it means something has probably gone wrong, +# and before the live database is saved as a backup, the current backup is saved as a new file, so that +# it can be recovered later. +def dbBackup(): + if not os.path.isfile(databasePath) or (os.path.isfile(backupPath) + and datetime.fromtimestamp(os.stat(backupPath).st_mtime) > datetime.now() - timedelta(hours = 24)): + return + if os.path.isfile(backupPath): + if countLines(backupPath) < countLines(databasePath): + os.remove(backupPath) + else: + date = datetime.now().strftime("%y%m%d") + os.rename(backupPath, backupPath + "_" + date) + writeLog("Current backup file contains more entries than current live database, backup saved") + shutil.copyfile(databasePath, backupPath) + writeLog("Backup of database taken") + +# Here the whole thing is run +database = jsonRead() +posts = getPosts() +updates = post(posts) +if updates: + saveDB() + cleanup() +dbBackup() +if not posts: + writeLog("No new posts found.")