feat: refactor external input handling code

- refactor import/export code
- make import/export use the same JSON parsing as API
- make Patch.AssertIsValid actually useful
This commit is contained in:
spiral
2021-08-25 21:43:31 -04:00
parent f912805ecc
commit 4b944e2b20
18 changed files with 619 additions and 694 deletions

View File

@@ -1,204 +0,0 @@
#nullable enable
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Data;
using System.Linq;
using System.Threading.Tasks;
using Dapper;
using NodaTime;
using NpgsqlTypes;
namespace PluralKit.Core
{
public class BulkImporter: IAsyncDisposable
{
private readonly SystemId _systemId;
private readonly IPKConnection _conn;
private readonly IPKTransaction _tx;
private readonly Dictionary<string, MemberId> _knownMembers = new Dictionary<string, MemberId>();
private readonly Dictionary<string, PKMember> _existingMembersByHid = new Dictionary<string, PKMember>();
private readonly Dictionary<string, PKMember> _existingMembersByName = new Dictionary<string, PKMember>();
private BulkImporter(SystemId systemId, IPKConnection conn, IPKTransaction tx)
{
_systemId = systemId;
_conn = conn;
_tx = tx;
}
public static async Task<BulkImporter> Begin(PKSystem system, IPKConnection conn)
{
var tx = await conn.BeginTransactionAsync();
var importer = new BulkImporter(system.Id, conn, tx);
await importer.Begin();
return importer;
}
public async Task Begin()
{
// Fetch all members in the system and log their names and hids
var members = await _conn.QueryAsync<PKMember>("select id, hid, name from members where system = @System",
new {System = _systemId});
foreach (var m in members)
{
_existingMembersByHid[m.Hid] = m;
_existingMembersByName[m.Name] = m;
}
}
/// <summary>
/// Checks whether trying to add a member with the given hid and name would result in creating a new member (as opposed to just updating one).
/// </summary>
public bool IsNewMember(string hid, string name) => FindExistingMemberInSystem(hid, name) == null;
/// <summary>
/// Imports a member into the database
/// </summary>
/// <remarks>If an existing member exists in this system that matches this member in either HID or name, it'll overlay the member information on top of this instead.</remarks>
/// <param name="identifier">An opaque identifier string that refers to this member regardless of source. Is used when importing switches. Value is irrelevant, but should be consistent with the same member later.</param>
/// <param name="potentialHid">When trying to match the member to an existing member, will use a member with this HID if present in system.</param>
/// <param name="potentialName">When trying to match the member to an existing member, will use a member with this name if present in system.</param>
/// <param name="patch">A member patch struct containing the data to apply to this member </param>
/// <returns>The inserted member object, which may or may not share an ID or HID with the input member.</returns>
public async Task<PKMember> AddMember(string identifier, string potentialHid, string potentialName, MemberPatch patch)
{
// See if we can find a member that matches this one
// if not, roll a new hid and we'll insert one with that
// (we can't trust the hid given in the member, it might let us overwrite another system's members)
var existingMember = FindExistingMemberInSystem(potentialHid, potentialName);
string newHid = existingMember?.Hid ?? await _conn.QuerySingleAsync<string>("find_free_member_hid", commandType: CommandType.StoredProcedure);
// Upsert member data and return the ID
QueryBuilder qb = QueryBuilder.Upsert("members", "hid")
.Constant("hid", "@Hid")
.Constant("system", "@System");
if (patch.Name.IsPresent) qb.Variable("name", "@Name");
if (patch.DisplayName.IsPresent) qb.Variable("display_name", "@DisplayName");
if (patch.Description.IsPresent) qb.Variable("description", "@Description");
if (patch.Pronouns.IsPresent) qb.Variable("pronouns", "@Pronouns");
if (patch.Color.IsPresent) qb.Variable("color", "@Color");
if (patch.AvatarUrl.IsPresent) qb.Variable("avatar_url", "@AvatarUrl");
if (patch.ProxyTags.IsPresent) qb.Variable("proxy_tags", "@ProxyTags");
if (patch.Birthday.IsPresent) qb.Variable("birthday", "@Birthday");
if (patch.KeepProxy.IsPresent) qb.Variable("keep_proxy", "@KeepProxy");
// don't overwrite message count on existing members
if (existingMember == null)
if (patch.MessageCount.IsPresent) qb.Variable("message_count", "@MessageCount");
var newMember = await _conn.QueryFirstAsync<PKMember>(qb.Build("returning *"),
new
{
Hid = newHid,
System = _systemId,
Name = patch.Name.Value,
DisplayName = patch.DisplayName.Value,
Description = patch.Description.Value,
Pronouns = patch.Pronouns.Value,
Color = patch.Color.Value,
AvatarUrl = patch.AvatarUrl.Value?.TryGetCleanCdnUrl(),
KeepProxy = patch.KeepProxy.Value,
ProxyTags = patch.ProxyTags.Value,
Birthday = patch.Birthday.Value,
MessageCount = patch.MessageCount.Value,
});
// Log this member ID by the given identifier
_knownMembers[identifier] = newMember.Id;
return newMember;
}
private PKMember? FindExistingMemberInSystem(string hid, string name)
{
if (_existingMembersByHid.TryGetValue(hid, out var byHid)) return byHid;
if (_existingMembersByName.TryGetValue(name, out var byName)) return byName;
return null;
}
/// <summary>
/// Register switches in bulk.
/// </summary>
/// <remarks>This function assumes there are no duplicate switches (ie. switches with the same timestamp).</remarks>
public async Task AddSwitches(IReadOnlyCollection<SwitchInfo> switches)
{
// Ensure we're aware of all the members we're trying to import from
if (!switches.All(sw => sw.MemberIdentifiers.All(m => _knownMembers.ContainsKey(m))))
throw new ArgumentException("One or more switch members haven't been added using this importer");
// Fetch the existing switches in the database so we can avoid duplicates
var existingSwitches = (await _conn.QueryAsync<PKSwitch>("select * from switches where system = @System", new {System = _systemId})).ToList();
var existingTimestamps = existingSwitches.Select(sw => sw.Timestamp).ToImmutableHashSet();
var lastSwitchId = existingSwitches.Count != 0 ? existingSwitches.Select(sw => sw.Id).Max() : (SwitchId?) null;
// Import switch definitions
var importedSwitches = new Dictionary<Instant, SwitchInfo>();
await using (var importer = _conn.BeginBinaryImport("copy switches (system, timestamp) from stdin (format binary)"))
{
foreach (var sw in switches)
{
// Don't import duplicate switches
if (existingTimestamps.Contains(sw.Timestamp)) continue;
// Otherwise, write to importer
await importer.StartRowAsync();
await importer.WriteAsync(_systemId.Value, NpgsqlDbType.Integer);
await importer.WriteAsync(sw.Timestamp, NpgsqlDbType.Timestamp);
// Note that we've imported a switch with this timestamp
importedSwitches[sw.Timestamp] = sw;
}
// Commit the import
await importer.CompleteAsync();
}
// Now, fetch all the switches we just added (so, now we get their IDs too)
// IDs are sequential, so any ID in this system, with a switch ID > the last max, will be one we just added
var justAddedSwitches = await _conn.QueryAsync<PKSwitch>(
"select * from switches where system = @System and id > @LastSwitchId",
new {System = _systemId, LastSwitchId = lastSwitchId?.Value ?? -1});
// Lastly, import the switch members
await using (var importer = _conn.BeginBinaryImport("copy switch_members (switch, member) from stdin (format binary)"))
{
foreach (var justAddedSwitch in justAddedSwitches)
{
if (!importedSwitches.TryGetValue(justAddedSwitch.Timestamp, out var switchInfo))
throw new Exception($"Found 'just-added' switch (by ID) with timestamp {justAddedSwitch.Timestamp}, but this did not correspond to a timestamp we just added a switch entry of! :/");
// We still assume timestamps are unique and non-duplicate, so:
var members = switchInfo.MemberIdentifiers;
foreach (var memberIdentifier in members)
{
if (!_knownMembers.TryGetValue(memberIdentifier, out var memberId))
throw new Exception($"Attempted to import switch with member identifier {memberIdentifier} but could not find an entry in the id map for this! :/");
await importer.StartRowAsync();
await importer.WriteAsync(justAddedSwitch.Id.Value, NpgsqlDbType.Integer);
await importer.WriteAsync(memberId.Value, NpgsqlDbType.Integer);
}
}
await importer.CompleteAsync();
}
}
public struct SwitchInfo
{
public Instant Timestamp;
/// <summary>
/// An ordered list of "member identifiers" matching with the identifier parameter passed to <see cref="BulkImporter.AddMember"/>.
/// </summary>
public IReadOnlyList<string> MemberIdentifiers;
}
public async ValueTask DisposeAsync() =>
await _tx.CommitAsync();
}
}

View File

@@ -0,0 +1,124 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Newtonsoft.Json.Linq;
using Autofac;
using Dapper;
using Serilog;
namespace PluralKit.Core
{
public partial class BulkImporter : IAsyncDisposable
{
private ILogger _logger { get; init; }
private ModelRepository _repo { get; init; }
private PKSystem _system { get; set; }
private IPKConnection _conn { get; init; }
private IPKTransaction _tx { get; init; }
private Func<string, Task> _confirmFunc { get; init; }
private readonly Dictionary<string, MemberId> _existingMemberHids = new();
private readonly Dictionary<string, MemberId> _existingMemberNames = new();
private readonly Dictionary<string, MemberId> _knownIdentifiers = new();
private ImportResultNew _result = new();
internal static async Task<ImportResultNew> PerformImport(IPKConnection conn, IPKTransaction tx, ModelRepository repo, ILogger logger,
ulong userId, PKSystem? system, JObject importFile, Func<string, Task> confirmFunc)
{
await using var importer = new BulkImporter()
{
_logger = logger,
_repo = repo,
_system = system,
_conn = conn,
_tx = tx,
_confirmFunc = confirmFunc,
};
if (system == null) {
system = await repo.CreateSystem(conn, null, tx);
await repo.AddAccount(conn, system.Id, userId);
importer._result.CreatedSystem = system.Hid;
importer._system = system;
}
// Fetch all members in the system and log their names and hids
var members = await conn.QueryAsync<PKMember>("select id, hid, name from members where system = @System",
new {System = system.Id});
foreach (var m in members)
{
importer._existingMemberHids[m.Hid] = m.Id;
importer._existingMemberNames[m.Name] = m.Id;
}
try
{
if (importFile.ContainsKey("tuppers"))
await importer.ImportTupperbox(importFile);
else if (importFile.ContainsKey("switches"))
await importer.ImportPluralKit(importFile);
else
throw new ImportException("File type is unknown.");
importer._result.Success = true;
await tx.CommitAsync();
}
catch (ImportException e)
{
importer._result.Success = false;
importer._result.Message = e.Message;
}
catch (ArgumentNullException)
{
importer._result.Success = false;
}
return importer._result;
}
private (MemberId?, bool) TryGetExistingMember(string hid, string name)
{
if (_existingMemberHids.TryGetValue(hid, out var byHid)) return (byHid, true);
if (_existingMemberNames.TryGetValue(name, out var byName)) return (byName, false);
return (null, false);
}
private async Task AssertLimitNotReached(int newMembers)
{
var memberLimit = _system.MemberLimitOverride ?? Limits.MaxMemberCount;
var existingMembers = await _repo.GetSystemMemberCount(_conn, _system.Id);
if (existingMembers + newMembers > memberLimit)
throw new ImportException($"Import would exceed the maximum number of members ({memberLimit}).");
}
public async ValueTask DisposeAsync()
{
// try rolling back the transaction
// this will throw if the transaction was committed, but that's fine
// so we just catch InvalidOperationException
try
{
await _tx.RollbackAsync();
}
catch (InvalidOperationException) {}
}
private class ImportException : Exception {
public ImportException(string Message) : base(Message) {}
}
}
public record ImportResultNew
{
public int Added = 0;
public int Modified = 0;
public bool Success;
public string? CreatedSystem;
public string? Message;
}
}

View File

@@ -0,0 +1,169 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;
using System.Threading.Tasks;
using Dapper;
using Newtonsoft.Json.Linq;
using NodaTime;
using NpgsqlTypes;
namespace PluralKit.Core
{
public partial class BulkImporter
{
private async Task<ImportResultNew> ImportPluralKit(JObject importFile)
{
var patch = SystemPatch.FromJSON(importFile);
try
{
patch.AssertIsValid();
}
catch (ValidationError e)
{
throw new ImportException($"Field {e.Message} in export file is invalid.");
}
await _repo.UpdateSystem(_conn, _system.Id, patch, _tx);
var members = importFile.Value<JArray>("members");
var switches = importFile.Value<JArray>("switches");
var newMembers = members.Count(m => {
var (found, _) = TryGetExistingMember(m.Value<string>("id"), m.Value<string>("name"));
return found == null;
});
await AssertLimitNotReached(newMembers);
foreach (JObject member in members)
await ImportMember(member);
if (switches.Any(sw => sw.Value<JArray>("members").Any(m => !_knownIdentifiers.ContainsKey((string) m))))
throw new ImportException("One or more switches include members that haven't been imported.");
await ImportSwitches(switches);
return _result;
}
private async Task ImportMember(JObject member)
{
var id = member.Value<string>("id");
var name = member.Value<string>("name");
var (found, isHidExisting) = TryGetExistingMember(id, name);
var isNewMember = found == null;
var referenceName = isHidExisting ? id : name;
if (isNewMember)
_result.Added++;
else
_result.Modified++;
_logger.Debug(
"Importing member with identifier {FileId} to system {System} (is creating new member? {IsCreatingNewMember})",
referenceName, _system.Id, isNewMember
);
var patch = MemberPatch.FromJSON(member);
try
{
patch.AssertIsValid();
}
catch (FieldTooLongError e)
{
throw new ImportException($"Field {e.Name} in member {referenceName} is too long ({e.ActualLength} > {e.MaxLength}).");
}
catch (ValidationError e)
{
throw new ImportException($"Field {e.Message} in member {referenceName} is invalid.");
}
MemberId? memberId = found;
if (isNewMember)
{
var newMember = await _repo.CreateMember(_conn, _system.Id, patch.Name.Value, _tx);
memberId = newMember.Id;
}
_knownIdentifiers[id] = memberId.Value;
await _repo.UpdateMember(_conn, memberId.Value, patch, _tx);
}
private async Task ImportSwitches(JArray switches)
{
var existingSwitches = (await _conn.QueryAsync<PKSwitch>("select * from switches where system = @System", new {System = _system.Id})).ToList();
var existingTimestamps = existingSwitches.Select(sw => sw.Timestamp).ToImmutableHashSet();
var lastSwitchId = existingSwitches.Count != 0 ? existingSwitches.Select(sw => sw.Id).Max() : (SwitchId?) null;
if (switches.Count > 10000)
throw new ImportException($"Too many switches present in import file.");
// Import switch definitions
var importedSwitches = new Dictionary<Instant, JArray>();
await using (var importer = _conn.BeginBinaryImport("copy switches (system, timestamp) from stdin (format binary)"))
{
foreach (var sw in switches)
{
var timestampString = sw.Value<string>("timestamp");
var timestamp = DateTimeFormats.TimestampExportFormat.Parse(timestampString);
if (!timestamp.Success) throw new ImportException($"Switch timestamp {timestampString} is not an valid timestamp.");
// Don't import duplicate switches
if (existingTimestamps.Contains(timestamp.Value)) continue;
// Otherwise, write to importer
await importer.StartRowAsync();
await importer.WriteAsync(_system.Id.Value, NpgsqlDbType.Integer);
await importer.WriteAsync(timestamp.Value, NpgsqlDbType.Timestamp);
var members = sw.Value<JArray>("members");
if (members.Count > Limits.MaxSwitchMemberCount)
throw new ImportException($"Switch with timestamp {timestampString} contains too many members ({members.Count} > 100).");
// Note that we've imported a switch with this timestamp
importedSwitches[timestamp.Value] = sw.Value<JArray>("members");
}
// Commit the import
await importer.CompleteAsync();
}
// Now, fetch all the switches we just added (so, now we get their IDs too)
// IDs are sequential, so any ID in this system, with a switch ID > the last max, will be one we just added
var justAddedSwitches = await _conn.QueryAsync<PKSwitch>(
"select * from switches where system = @System and id > @LastSwitchId",
new {System = _system.Id, LastSwitchId = lastSwitchId?.Value ?? -1});
// Lastly, import the switch members
await using (var importer = _conn.BeginBinaryImport("copy switch_members (switch, member) from stdin (format binary)"))
{
foreach (var justAddedSwitch in justAddedSwitches)
{
if (!importedSwitches.TryGetValue(justAddedSwitch.Timestamp, out var switchMembers))
throw new Exception($"Found 'just-added' switch (by ID) with timestamp {justAddedSwitch.Timestamp}, but this did not correspond to a timestamp we just added a switch entry of! :/");
// We still assume timestamps are unique and non-duplicate, so:
foreach (var memberIdentifier in switchMembers)
{
if (!_knownIdentifiers.TryGetValue((string) memberIdentifier, out var memberId))
throw new Exception($"Attempted to import switch with member identifier {memberIdentifier} but could not find an entry in the id map for this! :/");
await importer.StartRowAsync();
await importer.WriteAsync(justAddedSwitch.Id.Value, NpgsqlDbType.Integer);
await importer.WriteAsync(memberId.Value, NpgsqlDbType.Integer);
}
}
await importer.CompleteAsync();
}
}
}
}

View File

@@ -0,0 +1,122 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Newtonsoft.Json.Linq;
using NodaTime;
namespace PluralKit.Core
{
public partial class BulkImporter
{
private async Task<ImportResultNew> ImportTupperbox(JObject importFile)
{
var tuppers = importFile.Value<JArray>("tuppers");
var newMembers = tuppers.Count(t => !_existingMemberNames.TryGetValue("name", out var memberId));
await AssertLimitNotReached(newMembers);
string lastSetTag = null;
bool multipleTags = false;
bool hasGroup = false;
foreach (JObject tupper in tuppers)
(lastSetTag, multipleTags, hasGroup) = await ImportTupper(tupper, lastSetTag);
if (multipleTags || hasGroup)
{
var issueStr =
$"{Emojis.Warn} The following potential issues were detected converting your Tupperbox input file:";
if (hasGroup)
issueStr +=
"\n- PluralKit does not support member groups. Members will be imported without groups.";
if (multipleTags)
issueStr +=
"\n- PluralKit does not support per-member system tags. Since you had multiple members with distinct tags, those tags will be applied to the members' *display names*/nicknames instead.";
await _confirmFunc(issueStr);
_result.Success = true;
}
return _result;
}
private async Task<(string lastSetTag, bool multipleTags, bool hasGroup)> ImportTupper(JObject tupper, string lastSetTag)
{
if (!tupper.ContainsKey("name") || tupper["name"].Type == JTokenType.Null)
throw new ImportException("Field 'name' cannot be null.");
var hasGroup = tupper.ContainsKey("group_id") && tupper["group_id"].Type != JTokenType.Null;
var multipleTags = false;
var name = tupper.Value<string>("name");
var patch = new MemberPatch();
patch.Name = name;
if (tupper.ContainsKey("avatar_url") && tupper["avatar_url"].Type != JTokenType.Null) patch.AvatarUrl = tupper.Value<string>("avatar_url").NullIfEmpty();
if (tupper.ContainsKey("brackets"))
{
var brackets = tupper.Value<JArray>("brackets");
if (brackets.Count % 2 != 0)
throw new ImportException($"Field 'brackets' in tupper {name} is invalid.");
var tags = new List<ProxyTag>();
for (var i = 0; i < brackets.Count / 2; i++)
tags.Add(new ProxyTag((string) brackets[i * 2], (string) brackets[i * 2 + 1]));
patch.ProxyTags = tags.ToArray();
}
// todo: && if is new member
if (tupper.ContainsKey("posts")) patch.MessageCount = tupper.Value<int>("posts");
if (tupper.ContainsKey("show_brackets")) patch.KeepProxy = tupper.Value<bool>("show_brackets");
if (tupper.ContainsKey("birthday") && tupper["birthday"].Type != JTokenType.Null)
{
var parsed = DateTimeFormats.TimestampExportFormat.Parse(tupper.Value<string>("birthday"));
if (!parsed.Success)
throw new ImportException($"Field 'birthday' in tupper {name} is invalid.");
patch.Birthday = LocalDate.FromDateTime(parsed.Value.ToDateTimeUtc());
}
if (tupper.ContainsKey("description")) patch.Description = tupper.Value<string>("description");
if (tupper.ContainsKey("tag") && tupper["tag"].Type != JTokenType.Null)
{
var tag = tupper.Value<string>("tag");
if (tag != lastSetTag)
{
lastSetTag = tag;
multipleTags = true;
}
patch.DisplayName = $"{name} {tag}";
}
var isNewMember = false;
if (!_existingMemberNames.TryGetValue(name, out var memberId))
{
var newMember = await _repo.CreateMember(_conn, _system.Id, name, _tx);
memberId = newMember.Id;
isNewMember = true;
_result.Added++;
}
else
_result.Modified++;
_logger.Debug("Importing member with identifier {FileId} to system {System} (is creating new member? {IsCreatingNewMember})",
name, _system.Id, isNewMember);
try
{
patch.AssertIsValid();
}
catch (FieldTooLongError e)
{
throw new ImportException($"Field {e.Name} in tupper {name} is too long ({e.ActualLength} > {e.MaxLength}).");
}
catch (ValidationError e)
{
throw new ImportException($"Field {e.Message} in tupper {name} is invalid.");
}
await _repo.UpdateMember(_conn, memberId, patch, _tx);
return (lastSetTag, multipleTags, hasGroup);
}
}
}

View File

@@ -1,19 +0,0 @@
using System;
namespace PluralKit.Core
{
internal static class JsonUtils
{
public static string BoundsCheckField(this string input, int maxLength, string nameInError)
{
if (input != null && input.Length > maxLength)
throw new JsonModelParseError($"{nameInError} too long ({input.Length} > {maxLength}).");
return input;
}
}
public class JsonModelParseError: Exception
{
public JsonModelParseError(string message): base(message) { }
}
}