using System.Diagnostics.CodeAnalysis;
-using System.Globalization;
+using System.Text.RegularExpressions;
using Content.Shared.CCVar;
using Robust.Shared.Configuration;
namespace Content.Server.Chat.Managers;
+/// <summary>
+/// Sanitizes messages!
+/// It currently ony removes the shorthands for emotes (like "lol" or "^-^") from a chat message and returns the last
+/// emote in their message
+/// </summary>
public sealed class ChatSanitizationManager : IChatSanitizationManager
{
- [Dependency] private readonly IConfigurationManager _configurationManager = default!;
-
- private static readonly Dictionary<string, string> SmileyToEmote = new()
+ private static readonly Dictionary<string, string> ShorthandToEmote = new()
{
- // I could've done this with regex, but felt it wasn't the right idea.
{ ":)", "chatsan-smiles" },
{ ":]", "chatsan-smiles" },
{ "=)", "chatsan-smiles" },
{ "kek", "chatsan-laughs" },
{ "rofl", "chatsan-laughs" },
{ "o7", "chatsan-salutes" },
- { ";_;7", "chatsan-tearfully-salutes"},
+ { ";_;7", "chatsan-tearfully-salutes" },
{ "idk", "chatsan-shrugs" },
{ ";)", "chatsan-winks" },
{ ";]", "chatsan-winks" },
{ "(':", "chatsan-tearfully-smiles" },
{ "[':", "chatsan-tearfully-smiles" },
{ "('=", "chatsan-tearfully-smiles" },
- { "['=", "chatsan-tearfully-smiles" },
+ { "['=", "chatsan-tearfully-smiles" }
};
+ [Dependency] private readonly IConfigurationManager _configurationManager = default!;
+ [Dependency] private readonly ILocalizationManager _loc = default!;
+
private bool _doSanitize;
public void Initialize()
_configurationManager.OnValueChanged(CCVars.ChatSanitizerEnabled, x => _doSanitize = x, true);
}
- public bool TrySanitizeOutSmilies(string input, EntityUid speaker, out string sanitized, [NotNullWhen(true)] out string? emote)
+ /// <summary>
+ /// Remove the shorthands from the message, returning the last one found as the emote
+ /// </summary>
+ /// <param name="message">The pre-sanitized message</param>
+ /// <param name="speaker">The speaker</param>
+ /// <param name="sanitized">The sanitized message with shorthands removed</param>
+ /// <param name="emote">The localized emote</param>
+ /// <returns>True if emote has been sanitized out</returns>
+ public bool TrySanitizeEmoteShorthands(string message,
+ EntityUid speaker,
+ out string sanitized,
+ [NotNullWhen(true)] out string? emote)
{
+ emote = null;
+ sanitized = message;
+
if (!_doSanitize)
- {
- sanitized = input;
- emote = null;
return false;
- }
- input = input.TrimEnd();
+ // -1 is just a canary for nothing found yet
+ var lastEmoteIndex = -1;
- foreach (var (smiley, replacement) in SmileyToEmote)
+ foreach (var (shorthand, emoteKey) in ShorthandToEmote)
{
- if (input.EndsWith(smiley, true, CultureInfo.InvariantCulture))
+ // We have to escape it because shorthands like ":)" or "-_-" would break the regex otherwise.
+ var escaped = Regex.Escape(shorthand);
+
+ // So there are 2 cases:
+ // - If there is whitespace before it and after it is either punctuation, whitespace, or the end of the line
+ // Delete the word and the whitespace before
+ // - If it is at the start of the string and is followed by punctuation, whitespace, or the end of the line
+ // Delete the word and the punctuation if it exists.
+ var pattern =
+ $@"\s{escaped}(?=\p{{P}}|\s|$)|^{escaped}(?:\p{{P}}|(?=\s|$))";
+
+ var r = new Regex(pattern, RegexOptions.RightToLeft | RegexOptions.IgnoreCase);
+
+ // We're using sanitized as the original message until the end so that we can make sure the indices of
+ // the emotes are accurate.
+ var lastMatch = r.Match(sanitized);
+
+ if (!lastMatch.Success)
+ continue;
+
+ if (lastMatch.Index > lastEmoteIndex)
{
- sanitized = input.Remove(input.Length - smiley.Length).TrimEnd();
- emote = Loc.GetString(replacement, ("ent", speaker));
- return true;
+ lastEmoteIndex = lastMatch.Index;
+ emote = _loc.GetString(emoteKey, ("ent", speaker));
}
+
+ message = r.Replace(message, string.Empty);
}
- sanitized = input;
- emote = null;
- return false;
+ sanitized = message.Trim();
+ return emote is not null;
}
}