]> git.smokeofanarchy.ru Git - space-station-14.git/commitdiff
Sanitize shorthand emotes throughought the whole message (#28645)
authorThomas <87614336+Aeshus@users.noreply.github.com>
Thu, 17 Oct 2024 14:01:32 +0000 (09:01 -0500)
committerGitHub <noreply@github.com>
Thu, 17 Oct 2024 14:01:32 +0000 (16:01 +0200)
* Rename ChatSanitizationManager to ChatEmoteSanitizationManager

The prior name was kind of confusing as there's a emote one and
then now there's also chat expansion happening in the accent system,
so knowing which I actually need to edit is useful.

So, I just need to keep myself not confused.

* Rename smileyToEmote and remove punctuation duplicates

The name SmileyToEmote is just... Bad.

Plus, I needed to remove the punctuation duplicates as that would
break any kind of regex parsing that I tried.

* Switch to regex from checking end of string

I also changed from System.Globalization to ILocalizationManager.

Writing that regex was definitely an experience.

* Document regex and the manager

* Rename it back

* Simplify regex

Content.Server/Chat/Managers/ChatSanitizationManager.cs
Content.Server/Chat/Managers/IChatSanitizationManager.cs
Content.Server/Chat/Systems/ChatSystem.cs

index 634d8cdefabf43c1f6f33056264f4406f5b81b60..b0d28eae75ca2898c490f64b0ad2f3259bbc207e 100644 (file)
@@ -1,17 +1,19 @@
 using System.Diagnostics.CodeAnalysis;
-using System.Globalization;
+using System.Text.RegularExpressions;
 using Content.Shared.CCVar;
 using Robust.Shared.Configuration;
 
 namespace Content.Server.Chat.Managers;
 
+/// <summary>
+///     Sanitizes messages!
+///     It currently ony removes the shorthands for emotes (like "lol" or "^-^") from a chat message and returns the last
+///     emote in their message
+/// </summary>
 public sealed class ChatSanitizationManager : IChatSanitizationManager
 {
-    [Dependency] private readonly IConfigurationManager _configurationManager = default!;
-
-    private static readonly Dictionary<string, string> SmileyToEmote = new()
+    private static readonly Dictionary<string, string> ShorthandToEmote = new()
     {
-        // I could've done this with regex, but felt it wasn't the right idea.
         { ":)", "chatsan-smiles" },
         { ":]", "chatsan-smiles" },
         { "=)", "chatsan-smiles" },
@@ -75,7 +77,7 @@ public sealed class ChatSanitizationManager : IChatSanitizationManager
         { "kek", "chatsan-laughs" },
         { "rofl", "chatsan-laughs" },
         { "o7", "chatsan-salutes" },
-        { ";_;7", "chatsan-tearfully-salutes"},
+        { ";_;7", "chatsan-tearfully-salutes" },
         { "idk", "chatsan-shrugs" },
         { ";)", "chatsan-winks" },
         { ";]", "chatsan-winks" },
@@ -88,9 +90,12 @@ public sealed class ChatSanitizationManager : IChatSanitizationManager
         { "(':", "chatsan-tearfully-smiles" },
         { "[':", "chatsan-tearfully-smiles" },
         { "('=", "chatsan-tearfully-smiles" },
-        { "['=", "chatsan-tearfully-smiles" },
+        { "['=", "chatsan-tearfully-smiles" }
     };
 
+    [Dependency] private readonly IConfigurationManager _configurationManager = default!;
+    [Dependency] private readonly ILocalizationManager _loc = default!;
+
     private bool _doSanitize;
 
     public void Initialize()
@@ -98,29 +103,60 @@ public sealed class ChatSanitizationManager : IChatSanitizationManager
         _configurationManager.OnValueChanged(CCVars.ChatSanitizerEnabled, x => _doSanitize = x, true);
     }
 
-    public bool TrySanitizeOutSmilies(string input, EntityUid speaker, out string sanitized, [NotNullWhen(true)] out string? emote)
+    /// <summary>
+    ///     Remove the shorthands from the message, returning the last one found as the emote
+    /// </summary>
+    /// <param name="message">The pre-sanitized message</param>
+    /// <param name="speaker">The speaker</param>
+    /// <param name="sanitized">The sanitized message with shorthands removed</param>
+    /// <param name="emote">The localized emote</param>
+    /// <returns>True if emote has been sanitized out</returns>
+    public bool TrySanitizeEmoteShorthands(string message,
+        EntityUid speaker,
+        out string sanitized,
+        [NotNullWhen(true)] out string? emote)
     {
+        emote = null;
+        sanitized = message;
+
         if (!_doSanitize)
-        {
-            sanitized = input;
-            emote = null;
             return false;
-        }
 
-        input = input.TrimEnd();
+        // -1 is just a canary for nothing found yet
+        var lastEmoteIndex = -1;
 
-        foreach (var (smiley, replacement) in SmileyToEmote)
+        foreach (var (shorthand, emoteKey) in ShorthandToEmote)
         {
-            if (input.EndsWith(smiley, true, CultureInfo.InvariantCulture))
+            // We have to escape it because shorthands like ":)" or "-_-" would break the regex otherwise.
+            var escaped = Regex.Escape(shorthand);
+
+            // So there are 2 cases:
+            // - If there is whitespace before it and after it is either punctuation, whitespace, or the end of the line
+            //   Delete the word and the whitespace before
+            // - If it is at the start of the string and is followed by punctuation, whitespace, or the end of the line
+            //   Delete the word and the punctuation if it exists.
+            var pattern =
+                $@"\s{escaped}(?=\p{{P}}|\s|$)|^{escaped}(?:\p{{P}}|(?=\s|$))";
+
+            var r = new Regex(pattern, RegexOptions.RightToLeft | RegexOptions.IgnoreCase);
+
+            // We're using sanitized as the original message until the end so that we can make sure the indices of
+            // the emotes are accurate.
+            var lastMatch = r.Match(sanitized);
+
+            if (!lastMatch.Success)
+                continue;
+
+            if (lastMatch.Index > lastEmoteIndex)
             {
-                sanitized = input.Remove(input.Length - smiley.Length).TrimEnd();
-                emote = Loc.GetString(replacement, ("ent", speaker));
-                return true;
+                lastEmoteIndex = lastMatch.Index;
+                emote = _loc.GetString(emoteKey, ("ent", speaker));
             }
+
+            message = r.Replace(message, string.Empty);
         }
 
-        sanitized = input;
-        emote = null;
-        return false;
+        sanitized = message.Trim();
+        return emote is not null;
     }
 }
index c067cf02ee78ab607c1e1dd41add5120d7232fae..ac85d4b4a7a5f5124efc7a57f5bc7bfc59fac0dc 100644 (file)
@@ -6,5 +6,8 @@ public interface IChatSanitizationManager
 {
     public void Initialize();
 
-    public bool TrySanitizeOutSmilies(string input, EntityUid speaker, out string sanitized, [NotNullWhen(true)] out string? emote);
+    public bool TrySanitizeEmoteShorthands(string input,
+        EntityUid speaker,
+        out string sanitized,
+        [NotNullWhen(true)] out string? emote);
 }
index 624c18130b0c2ace19ded31cf4f8783a9d86d35a..41646bee2e46b7f4b01613714a6d6a69dedbdb8a 100644 (file)
@@ -749,6 +749,9 @@ public sealed partial class ChatSystem : SharedChatSystem
         var newMessage = message.Trim();
         newMessage = SanitizeMessageReplaceWords(newMessage);
 
+        // Sanitize it first as it might change the word order
+        _sanitizer.TrySanitizeEmoteShorthands(newMessage, source, out newMessage, out emoteStr);
+
         if (capitalize)
             newMessage = SanitizeMessageCapital(newMessage);
         if (capitalizeTheWordI)
@@ -756,8 +759,6 @@ public sealed partial class ChatSystem : SharedChatSystem
         if (punctuate)
             newMessage = SanitizeMessagePeriod(newMessage);
 
-        _sanitizer.TrySanitizeOutSmilies(newMessage, source, out newMessage, out emoteStr);
-
         return newMessage;
     }