From 008dd78f14bbe1d2b9d0010f016b2e61d14ce6a0 Mon Sep 17 00:00:00 2001 From: linus Date: Wed, 4 Oct 2023 13:21:39 +0200 Subject: [PATCH] added improved post length calculation --- crosspost.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/crosspost.py b/crosspost.py index 141f434..3e92316 100644 --- a/crosspost.py +++ b/crosspost.py @@ -5,7 +5,7 @@ from datetime import datetime, timedelta from auth import * from paths import * import settings -import json, os, urllib.request, random, string, shutil +import json, os, urllib.request, random, string, shutil, re date_in_format = '%Y-%m-%dT%H:%M:%S' @@ -285,7 +285,7 @@ def tweet(post, replyTo, images, postType, doPost): # Checking if the post is longer than 280 characters, and if so sending to the # splitPost-function. partTwo = "" - if len(post) > 280: + if postLength(post) > 280: post, partTwo = splitPost(post) # If the function does not return a post, splitting failed and we will skip this post. if not post: @@ -347,13 +347,27 @@ def toot(post, replyTo, images, doPost): id = a["id"] return id +# Function for correctly counting post length +def postLength(post): + # Twitter shortens urls to 23 characters + shortUrlLength = 23 + length = len(post) + # Finding all urls and calculating how much shorter the post will be after shortening + regex = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" + urls = re.findall(regex, post) + for url in urls: + urlLength = len(url[0]) + if urlLength > shortUrlLength: + length = length - (urlLength - shortUrlLength) + return length + # Function for splitting up posts that are too long for twitter. def splitPost(text): writeLog("Splitting post that is too long for twitter.") first = text # We first try to split the post into sentences, and send as many as can fit in the first one, # and the rest in the second. - sentences = text.split(".") + sentences = text.split(". ") i = 1 while len(first) > 280 and i < len(sentences): first = ".".join(sentences[:(len(sentences) - i)]) + "."