Suffix Arrayの計算
LarssonSadakane法で接尾辞配列を求めるプログラム。
基本的にqsufsort.cそのまんま。
実行結果
10 : a 7 : abra 0 : abracadabra 3 : acadabra 5 : adabra 8 : bra 1 : bracadabra 4 : cadabra 6 : dabra 9 : ra 2 : racadabra
Program.cs
using System; namespace Kadzus { class Program { static void Main(string[] args) { var str = "abracadabra"; var sa = LarssonSadakane.GetSuffixArray(str); for(int i = 0; i < str.Length; i++) { Console.WriteLine(sa[i].ToString().PadLeft(2) + " : " + str.Substring(sa[i])); } Console.ReadLine(); } } }
LarssonSadakane.cs
using System; namespace Kadzus { public class LarssonSadakane { public static int[] GetSuffixArray(string str) { int[] input = new int[str.Length + 1]; for (int i = str.Length - 1; i >= 0; i--) input[i] = str[i]; return GetSuffixArray(input, 0, str.Length, false); } public static int[] GetSuffixArray(int[] input, int start, int length, bool avoid_side_effect = false) { int[] I = new int[length + 1]; int[] V; int r; int h; if (avoid_side_effect) { V = new int[length + 1]; Array.Copy(input, start, V, 0, length); start = 0; } else V = input; int max = input[start]; int min = max; for (int i = length - 2, index = start + 1; i >= 0; i--) { int v = input[index++]; if (v > max) max = v; if (v < min) min = v; } SuffixSort(V, I, out h, out r, start, length, max + 1, min); return I; } private static void SuffixSort(int[] V, int[] I, out int h, out int r, int start, int n, int k, int l) { int pi, pk; int i, j, s, sl; if (n >= k - l) { j = transform(V, I, out r, start, n, k, l, n); bucketsort(V, I, start, n, j); } else { transform(V, I, out r, start, n, k, l, int.MaxValue); for (i = 0; i <= n; ++i) I[i] = i; h = 0; sort_split(V, I, ref h, start, 0, n + 1); } h = r; while (I[0] >= -n) { pi = sl = 0; do { if ((s = I[pi]) < 0) { pi -= s; sl += s; } else { if (sl != 0) { I[pi + sl] = sl; sl = 0; } pk = V[start + s] + 1; sort_split(V, I, ref h, start, pi, pk - pi); pi = pk; } } while (pi <= n); if (sl != 0) I[pi + sl] = sl; h *= 2; } for (i = 0; i <= n; i++) if (V[start + i] > 0) I[V[start + i] - 1] = i; } private static void sort_split(int[] V, int[] I, ref int h, int start, int p, int n) { int pa, pb, pc, pd, pl, pm, pn; int f, v, s, t; if (n < 7) { select_sort_split(V, I, ref h, start, p, n); return; } v = choose_pivot(V, I, ref h, start, p, n); pa = pb = p; pc = pd = p + n - 1; while (true) { while (pb <= pc && (f = V[start + I[pb] + h]) <= v) { if (f == v) { int tmp = I[pa]; I[pa] = I[pb]; I[pb] = tmp; pa++; } pb++; } while (pc >= pb && (f = V[start + I[pc] + h]) >= v) { if (f == v) { int tmp = I[pc]; I[pc] = I[pd]; I[pd] = tmp; pd--; } pc--; } if (pb > pc) break; int tmp2 = I[pb]; I[pb] = I[pc]; I[pc] = tmp2; pb++; pc--; } pn = p + n; if ((s = pa - p) > (t = pb - pa)) s = t; for (pl = p, pm = pb - s; s != 0; s--, pl++, pm++) { int tmp = I[pl]; I[pl] = I[pm]; I[pm] = tmp; } if ((s = pd - pc) > (t = pn - pd - 1)) s = t; for (pl = pb, pm = pn - s; s != 0; s--, pl++, pm++) { int tmp = I[pl]; I[pl] = I[pm]; I[pm] = tmp; } s = pb - pa; t = pd - pc; if (s > 0) sort_split(V, I, ref h, start, p, s); update_group(V, I, start, p + s, p + n - t - 1); if (t > 0) sort_split(V, I, ref h, start, p + n - t, t); } private static void update_group(int[] V, int[] I, int start, int pl, int pm) { int g = pm; V[start + I[pl]] = g; if (pl == pm) I[pl] = -1; else { do { V[start + I[++pl]] = g; } while (pl < pm); } } private static int choose_pivot(int[] V, int[] I, ref int h, int start, int p, int n) { int pl, pm, pn; int s; pm = p + (n >> 1); if (n > 7) { pl = p; pn = p + n - 1; if (n > 40) { s = n >> 3; pl = V[start + I[pl] + h] < V[start + I[pl + s] + h] ? (V[start + I[pl + s] + h] < V[start + I[pl + s + s] + h] ? pl + s : V[start + I[pl] + h] < V[start + I[pl + s + s] + h] ? pl + s + s : pl) : (V[start + I[pl + s] + h] > V[start + I[pl + s + s] + h] ? pl + s : V[start + I[pl] + h] > V[start + I[pl + s + s] + h] ? pl + s + s : pl); pm = V[start + I[pm - s] + h] < V[start + I[pm] + h] ? (V[start + I[pm] + h] < V[start + I[pm + s] + h] ? pm : V[start + I[pm - s] + h] < V[start + I[pm + s] + h] ? pm + s : pm - s) : (V[start + I[pm] + h] > V[start + I[pm + s] + h] ? pm : V[start + I[pm - s] + h] > V[start + I[pm + s] + h] ? pm + s : pm - s); pn = V[start + I[pn - s - s] + h] < V[start + I[pn - s] + h] ? (V[start + I[pn - s] + h] < V[start + I[pn] + h] ? pn - s : V[start + I[pn - s - s] + h] < V[start + I[pn] + h] ? pn : pn - s - s) : (V[start + I[pn - s] + h] > V[start + I[pn] + h] ? pn - s : V[start + I[pn - s - s] + h] > V[start + I[pn] + h] ? pn : pn - s - s); } pm = V[start + I[pl] + h] < V[start + I[pm] + h] ? (V[start + I[pm] + h] < V[start + I[pn] + h] ? pm : V[start + I[pl] + h] < V[start + I[pn] + h] ? pn : pl) : (V[start + I[pm] + h] > V[start + I[pn] + h] ? pm : V[start + I[pl] + h] > V[start + I[pn] + h] ? pn : pl); } return V[start + I[pm] + h]; } private static void select_sort_split(int[] V, int[] I, ref int h, int start, int p, int n) { int pa, pb, pi, pn; int f, v; pa = p; pn = p + n - 1; while (pa < pn) { for (pi = pb = pa + 1, f = V[start + I[pa] + h]; pi <= pn; pi++) { if ((v = V[start + I[pi] + h]) < f) { f = v; int tmp = I[pi]; I[pi] = I[pa]; I[pa] = tmp; pb = pa + 1; } else if (v == f) { int tmp = I[pi]; I[pi] = I[pb]; I[pb] = tmp; pb++; } } update_group(V, I, start, pa, pb - 1); pa = pb; } if (pa == pn) { V[start + I[pa]] = pa; I[pa] = -1; } } private static void bucketsort(int[] V, int[] I, int start, int n, int k) { int pi; int i, c, d, g; for (pi = 0; pi < k; pi++) I[pi] = -1; for (i = 0; i <= n; ++i) { V[start + i] = I[c = V[start + i]]; I[c] = i; } for (pi = k - 1, i = n; pi >= 0; pi--) { d = V[start + (c = I[pi])]; V[start + c] = g = i; if (d >= 0) { I[i--] = c; do { d = V[start + (c = d)]; V[start + c] = g; I[i--] = c; } while (d >= 0); } else I[i--] = -1; } } private static int transform(int[] V, int[] I, out int r, int start, int n, int k, int l, int q) { int b, c, d, e, i, j, m, s; int pi, pj; for (s = 0, i = k - l; i != 0; s++, i >>= 1); e = int.MaxValue >> s; for (b = d = r = 0; r < n && d <= e && (c = d << s | (k - l)) <= q; r++) { b = b << s | (V[start + r] - l + 1); d = c; } m = (1 << (r - 1) * s) - 1; V[start + n] = l - 1; if (d <= n) { for (pi = 0; pi <= d; pi++) I[pi] = 0; for (pi = r, c = b; pi <= n; pi++) { I[c] = 1; c = (c & m) << s | (V[start + pi] - l + 1); } for (i = 1; i < r; i++) { I[c] = 1; c = (c & m) << s; } for (pi = 0, j = 1; pi <= d; pi++) if (I[pi] != 0) I[pi] = j++; for (pi = 0, pj = r, c = b; pj <= n; pi++, pj++) { V[start + pi] = I[c]; c = (c & m) << s | (V[start + pj] - l + 1); } while (pi < n) { V[start + pi++] = I[c]; c = (c & m) << s; } } else { for (pi = 0, pj = r, c = b; pj <= n; pi++, pj++) { V[start + pi] = c; c = (c & m) << s | (V[start + pj] - l + 1); } while (pi < n) { V[start + pi++] = c; c = (c & m) << s; } j = d + 1; } V[start + n] = 0; return j; } } }