[Add] MasterData Services.
This commit is contained in:
231
AMREZ.EOP.Domain/Shared/Data/ThaiAddressParser.cs
Normal file
231
AMREZ.EOP.Domain/Shared/Data/ThaiAddressParser.cs
Normal file
@@ -0,0 +1,231 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace AMREZ.EOP.Domain.Shared.Data;
|
||||
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
public record ParsedAddress(
|
||||
string Name,
|
||||
string Phone,
|
||||
string AddressOnly, // ที่อยู่เต็ม ไม่รวมชื่อ/เบอร์
|
||||
string AddressMain, // ส่วนต้นของที่อยู่ เช่น เลขที่ / หมู่ / ซอย / ถนน
|
||||
string Subdistrict, // ตำบล / แขวง
|
||||
string District, // อำเภอ / เขต
|
||||
string Province, // จังหวัด
|
||||
string PostalCode // รหัสไปรษณีย์ 5 หลัก
|
||||
);
|
||||
|
||||
public static class ThaiAddressParser
|
||||
{
|
||||
public static ParsedAddress Parse(string? raw)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(raw))
|
||||
{
|
||||
return new ParsedAddress("", "", "", "", "", "", "", "");
|
||||
}
|
||||
|
||||
var original = raw;
|
||||
|
||||
// normalize space + เอา emoji / สัญลักษณ์กวน ๆ ออก
|
||||
var sNorm = Regex.Replace(original, "[ \\t]+", " ");
|
||||
sNorm = sNorm
|
||||
.Replace("🏠", " ")
|
||||
.Replace("☎️", " ")
|
||||
.Replace("📍", " ");
|
||||
sNorm = Regex.Replace(sNorm.Trim(), "\\s*\\n\\s*", "\n");
|
||||
|
||||
// ====================== PHONE ======================
|
||||
string? phoneDigits = null;
|
||||
var phoneMatches = Regex.Matches(original.Replace("-", ""), @"0\d{8,9}");
|
||||
if (phoneMatches.Count > 0)
|
||||
{
|
||||
// ส่วนใหญ่เบอร์จะอยู่ท้าย เอาอันสุดท้าย
|
||||
phoneDigits = phoneMatches[^1].Value;
|
||||
}
|
||||
|
||||
var sWithoutPhone = sNorm;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(phoneDigits))
|
||||
{
|
||||
// pattern ยอมให้มี "-" หรือ space คั่นตัวเลขแต่ละตัว
|
||||
var phoneChars = phoneDigits.ToCharArray();
|
||||
var phonePattern = string.Join("[- ]?", phoneChars);
|
||||
var phoneRegex = new Regex(phonePattern);
|
||||
|
||||
sWithoutPhone = phoneRegex.Replace(sWithoutPhone, "");
|
||||
|
||||
// ตัดคำ "โทร", "Tel", "เบอร์" ที่ติดกับเบอร์ออก
|
||||
sWithoutPhone = Regex.Replace(
|
||||
sWithoutPhone,
|
||||
@"(โทร\.?|Tel\.?|tel\.?|เบอร์|[Tt]el ?:)\s*",
|
||||
"",
|
||||
RegexOptions.CultureInvariant
|
||||
);
|
||||
}
|
||||
|
||||
// ====================== NAME ======================
|
||||
var lines = sWithoutPhone
|
||||
.Split('\n', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries);
|
||||
|
||||
var firstLine = lines.Length > 0 ? lines[0] : string.Empty;
|
||||
|
||||
// หา index สิ้นสุดชื่อ (ก่อนเจอเลขหรือคำว่า "ที่อยู่"/"ร้าน")
|
||||
var nameEndIndex = firstLine.Length;
|
||||
|
||||
var digitMatch = Regex.Match(firstLine, @"\d");
|
||||
if (digitMatch.Success && digitMatch.Index < nameEndIndex)
|
||||
{
|
||||
nameEndIndex = digitMatch.Index;
|
||||
}
|
||||
|
||||
var addrWordMatch = Regex.Match(firstLine, "(ที่อยู่|ร้าน)");
|
||||
if (addrWordMatch.Success && addrWordMatch.Index < nameEndIndex)
|
||||
{
|
||||
nameEndIndex = addrWordMatch.Index;
|
||||
}
|
||||
|
||||
var name = firstLine[..nameEndIndex].Trim(' ', ':', '-', '(', ')');
|
||||
// ถ้ามีวงเล็บท้ายชื่อ และไม่มีเลขข้างใน ตัดออก
|
||||
name = Regex.Replace(name, @"\([^0-9)]*\)$", "").Trim();
|
||||
|
||||
// ====================== ADDRESS TEXT (เต็ม) ======================
|
||||
var allText = string.Join(" ", lines);
|
||||
var addrText = allText;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(name) && addrText.StartsWith(name))
|
||||
{
|
||||
addrText = addrText[name.Length..].Trim(' ', ',', ':', '-');
|
||||
}
|
||||
|
||||
addrText = Regex.Replace(addrText, @"\s{2,}", " ");
|
||||
|
||||
// เคลียร์วงเล็บเปล่า ๆ "()" ที่บางแถวมี
|
||||
addrText = Regex.Replace(addrText, @"\(\s*\)", "").Trim();
|
||||
|
||||
// ====================== POSTAL CODE ======================
|
||||
string postal = "";
|
||||
var postalMatch = Regex.Match(addrText, @"(\d{5})(?!.*\d{5})");
|
||||
if (postalMatch.Success)
|
||||
{
|
||||
postal = postalMatch.Groups[1].Value;
|
||||
}
|
||||
|
||||
// helper เลือกกลุ่มแรกที่ไม่ว่าง
|
||||
static string Pick(params Group[] groups)
|
||||
{
|
||||
foreach (var g in groups)
|
||||
{
|
||||
if (g.Success && !string.IsNullOrWhiteSpace(g.Value))
|
||||
return g.Value.Trim();
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
// ====================== SUBDISTRICT (ตำบล/แขวง) ======================
|
||||
string subdistrict = "";
|
||||
var subMatch = Regex.Match(
|
||||
addrText,
|
||||
@"ต\. ?(?<t1>[^ \d]+)|ตำบล ?(?<t2>[^ \d]+)|แขวง ?(?<t3>[^ \d]+)"
|
||||
);
|
||||
if (subMatch.Success)
|
||||
{
|
||||
subdistrict = Pick(
|
||||
subMatch.Groups["t1"],
|
||||
subMatch.Groups["t2"],
|
||||
subMatch.Groups["t3"]
|
||||
);
|
||||
}
|
||||
|
||||
// ====================== DISTRICT (อำเภอ/เขต) ======================
|
||||
string district = "";
|
||||
var distMatch = Regex.Match(
|
||||
addrText,
|
||||
@"อ\. ?(?<d1>[^ \d]+)|อำเภอ ?(?<d2>[^ \d]+)|เขต ?(?<d3>[^ \d]+)"
|
||||
);
|
||||
if (distMatch.Success)
|
||||
{
|
||||
district = Pick(
|
||||
distMatch.Groups["d1"],
|
||||
distMatch.Groups["d2"],
|
||||
distMatch.Groups["d3"]
|
||||
);
|
||||
}
|
||||
|
||||
// ====================== PROVINCE ======================
|
||||
string province = "";
|
||||
if (Regex.IsMatch(addrText, @"กรุงเทพมหานคร|กรุงเทพฯ|กทม\.?|กรุงเทพ(?![กษ])"))
|
||||
{
|
||||
province = "กรุงเทพมหานคร";
|
||||
}
|
||||
else
|
||||
{
|
||||
var provMatch = Regex.Match(
|
||||
addrText,
|
||||
@"จ\. ?(?<p1>[^0-9 ]+)|จังหวัด ?(?<p2>[^0-9 ]+)"
|
||||
);
|
||||
if (provMatch.Success)
|
||||
{
|
||||
province = Pick(
|
||||
provMatch.Groups["p1"],
|
||||
provMatch.Groups["p2"]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// special rule: ถ้าอำเภอ = "เมือง" แล้วมีจังหวัด ให้กลายเป็น "เมือง{จังหวัด}"
|
||||
// เช่น เมือง + ลำพูน = เมืองลำพูน
|
||||
if (district == "เมือง" && !string.IsNullOrWhiteSpace(province))
|
||||
{
|
||||
district = $"เมือง{province}";
|
||||
}
|
||||
|
||||
// ====================== ADDRESS MAIN ======================
|
||||
var mainAddress = addrText;
|
||||
|
||||
// ลบรหัสไปรษณีย์ออกจาก address main
|
||||
if (!string.IsNullOrEmpty(postal))
|
||||
{
|
||||
mainAddress = mainAddress.Replace(postal, "").Trim();
|
||||
}
|
||||
|
||||
// ลบจังหวัดออก
|
||||
mainAddress = Regex.Replace(
|
||||
mainAddress,
|
||||
@"(กรุงเทพมหานคร|กรุงเทพฯ|กทม\.?|กรุงเทพ(?![กษ])|จ\. ?[^0-9 ]+|จังหวัด ?[^0-9 ]+)",
|
||||
"",
|
||||
RegexOptions.CultureInvariant
|
||||
);
|
||||
|
||||
// ลบตำบล/แขวง + อำเภอ/เขต ออกจาก address main
|
||||
mainAddress = Regex.Replace(
|
||||
mainAddress,
|
||||
@"(ต\. ?[^ \d]+|ตำบล ?[^ \d]+|แขวง ?[^ \d]+|อ\. ?[^ \d]+|อำเภอ ?[^ \d]+|เขต ?[^ \d]+)",
|
||||
"",
|
||||
RegexOptions.CultureInvariant
|
||||
);
|
||||
|
||||
// ลบ "กรุงเทพ / กทม" ที่ยังหลุดท้าย ๆ อีกที กันเคสแบบ "กรุงเทพ ."
|
||||
mainAddress = Regex.Replace(
|
||||
mainAddress,
|
||||
@"(กรุงเทพมหานคร|กรุงเทพฯ|กรุงเทพ|กทม\.?)\s*\.?",
|
||||
"",
|
||||
RegexOptions.CultureInvariant
|
||||
);
|
||||
|
||||
// เคลียร์ space / comma ซ้ำ
|
||||
mainAddress = Regex.Replace(mainAddress, @"\s{2,}", " ");
|
||||
mainAddress = mainAddress.Trim(' ', ',', '.');
|
||||
|
||||
return new ParsedAddress(
|
||||
Name: name,
|
||||
Phone: phoneDigits ?? "",
|
||||
AddressOnly: addrText,
|
||||
AddressMain: mainAddress,
|
||||
Subdistrict: subdistrict,
|
||||
District: district,
|
||||
Province: province,
|
||||
PostalCode: postal
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user