lcnetdev · scossu · Jan 9, 2026 · Jan 14, 2026 · Jan 30, 2026
diff --git a/scriptshifter/tables/data/_ignore_base.yml b/scriptshifter/tables/data/_ignore_base.yml
@@ -8,16 +8,24 @@ general:
 roman_to_script:
   ignore:
     - "At head of title"
-    - "at head of title"
     - "Colophon"
-    - "colophon"
+    - "Colophon"
     - "Cover title"
+    - "On cover"
+    - "S.l."
+    - "Spine title"
+    - "and one other"
+    - "at head of title"
+    - "colophon"
+    - "cover title"
     - "date of publication not identified"
+    - "et al."
+    - "on cover"
     - "place of publication not identified"
     - "publisher not identified"
-    - "and one other"
-    - "and others"
-    - "et al."
+    - "s.l."
+    - "s.n."
+    - "spine title"
   ignore_ptn:
     - "and ([a-z0-9]+ )?others"
 
@@ -29,17 +37,22 @@ roman_to_script:
     # dedicated U+2160÷U+216F (uppercase Roman
     # numerals) and/or U+2170÷U+217F (lower case Roman
     # numerals) ranges to avoid this ambiguity.
-    - "I{2,3}\\b"
-    - "I(V|X)\\b"
-    - "LI{,3}\\b"
-    - "LI?(V|X)\\b"
-    - "L(V|X{1,3})I{,3}\\b"
-    - "LX{1,3}I?V\\b"
-    - "LX{1,3}VI{,3}\\b"
-    - "VI{1,3}\\b"
-    - "X{1,3}I{1,3}\\b"
-    - "X{1,3}I(V|X)\\b"
-    - "X{1,3}VI{,3}\\b"
+    - "M{,3}(CM)?C?D?C{1,3}L?X{,3}I{,3}\\b"
+    - "M{1,3}(CM)?C?D?C{,3}L?X{,3}I{,3}\\b"
+    - "M{,3}(CM)?C?D?C{1,3}L?X{,3}I[VX]\\b"
+    - "M{1,3}(CM)?C?D?C{,3}L?X{,3}I[VX]\\b"
+
+    # NMay not be prefixed by M, D, C, L. Cannot use for single digits.
+    - "M{,3}(CM)?C?D?C{,3}I(I{,2}V|X)\\b"
+    - "M{,3}(CM)?C?D?C{,3}LI{1,3}\\b"
+    - "M{,3}(CM)?C?D?C{,3}LI?[VX]\\b"
+    - "M{,3}(CM)?C?D?C{,3}L(V|X{1,3})I{,3}\\b"
+    - "M{,3}(CM)?C?D?C{,3}LX{1,3}I?[VX]\\b"
+    - "M{,3}(CM)?C?D?C{,3}LX{1,3}VI{,3}\\b"
+    - "M{,3}(CM)?C?D?C{,3}VI{1,3}\\b"
+    - "M{,3}(CM)?C?D?C{,3}X{1,3}C?I{1,3}\\b"
+    - "M{,3}(CM)?C?D?C{,3}X{1,3}C?I[VX]\\b"
+    - "M{,3}(CM)?C?D?C{,3}X{1,3}C?VI{,3}\\b"
 
     # MARC sub-field markers.
     - "[\u2021\u01C2\\$][0-9a-z]\\b"
diff --git a/scriptshifter/tables/data/arabic 2025-12-01.yml b/scriptshifter/tables/data/arabic 2025-12-01.yml
@@ -0,0 +1,327 @@
+# Arabic S2R using the 3rd-party ArabicTransliterator library:
+# https://github.com/MTG/ArabicTransliterator
+
+---
+general:
+  name: Arabic
+  parents:
+    - _ignore_base
+  description: >
+    Version 1.0 (2025-11-29) - Arabic language R2S using a conversion table; S2R using a 3rd party library.
+  case_sensitive: false
+
+roman_to_script:
+  map:
+
+    # Punctuation marks:
+    "*": "\u066D"
+    ",": "\u060C"
+    ";": "\u061B"
+    "?": "\u061F"
+
+    # Exceptions for specific words
+
+    # Allah
+    "%alla\u0304h%": "\uFDF2"
+    "alla\u0304h": "\u0627\u0644\u0644\u0647"
+
+    # Qur'an
+    "qur\u02BCa\u0304n": "\u0642\u0631\u0622\u0646"
+
+    # lillah
+    "lilla\u0304h": "\u0644\u0644\u0647"
+
+    # billah
+    "billa\u0304h": "\u0628\u0644\u0644\u0647"
+
+    # Rahman
+    "rah\u0323ma\u0304n": "\u0631\u062D\u0645\u0646"
+
+    # Ruwat
+    "ruwa\u0304t": "\u0631\u0648\u0627\u0629"
+
+    # Hadha
+    "ha\u0304dha\u0304": "\u0647\u0630\u0627"
+
+    # Hadhihi
+    "ha\u0304dhi\u0304hi": "\u0647\u0630\u0647"
+
+    # dhalika
+    "dha\u0304lika": "\u0630\u0644\u0643"
+
+    # Ibn when it appears in the middle of a name sequence
+    "ibn": "\u0628\u0646"
+
+    # H[dot below]aya[macron]t
+    "h\u0323aya\u0304t": "\u062D\u064A\u0627\u0629"
+
+    # "sh[dot below] as in "Ishaq"
+    "sh\u0323": "\u0633\u062D"
+
+    # "s[prime]h" combos
+    "s\u02B9h": "\u0633\u0647"
+
+    # "th[dot below]"
+    "th\u0323": "\u062A\u062D"
+
+    # dh[dot under]
+    "dh\u0323": "\u062F\u062D"
+
+    # La-hu
+    "la-hu": "\u0644\u0647"
+
+    # Mi'ah
+    "mi\u02BEah": "\u0645\u0627\u0626\u0629"
+    "mi\u02BCah": "\u0645\u0627\u0626\u0629"
+
+    # Mi'at
+    "mi\u02BEat": "\u0645\u0627\u0626\u0629"
+    "mi\u02BCat": "\u0645\u0627\u0626\u0629"
+
+    # Numbers (I have set these to Hindi numbers. Note that Persian and Urdu
+    # will technically use \u06F0-06F9. This needs further discussion with PSD
+    # as RLIN21 used Hindi numbers, Connexion and Voyager does not.)
+
+    # Edition statements with Latin number
+    "al-t\u0323ab\u02BBah 1": "\u0627\u0644\u0637\u0628\u0639\u0629 1"
+    "al-t\u0323ab\u02BBah 2": "\u0627\u0644\u0637\u0628\u0639\u0629 2"
+    "al-t\u0323ab\u02BBah 3": "\u0627\u0644\u0637\u0628\u0639\u0629 3"
+    "al-t\u0323ab\u02BBah 4": "\u0627\u0644\u0637\u0628\u0639\u0629 4"
+    "al-t\u0323ab\u02BBah 5": "\u0627\u0644\u0637\u0628\u0639\u0629 5"
+    "al-t\u0323ab\u02BBah 6": "\u0627\u0644\u0637\u0628\u0639\u0629 6"
+    "al-t\u0323ab\u02BBah 7": "\u0627\u0644\u0637\u0628\u0639\u0629 7"
+    "al-t\u0323ab\u02BBah 8": "\u0627\u0644\u0637\u0628\u0639\u0629 8"
+    "al-t\u0323ab\u02BBah 9": "\u0627\u0644\u0637\u0628\u0639\u0629 9"
+
+    # Use Basic Arabic-Indic \u0660-0669
+    "0": "\u0660"
+    "1": "\u0661"
+    "2": "\u0662"
+    "3": "\u0663"
+    "4": "\u0664"
+    "5": "\u0665"
+    "6": "\u0666"
+    "7": "\u0667"
+    "8": "\u0668"
+    "9": "\u0669"
+
+    # Hyphenated prefixes:
+    "wa-": "\u0648"
+    "bi-": "\u0628"
+    "al-": "\u0627\u0644"
+    "lil-": "\u0644\u0644"
+    "li-": "\u0644"
+    "la\u0304-": "\u0644"
+    "fi\u0304-": "\u0641\u064A"
+    "ka-": "\u0643"
+
+    # Vowels and vowel/consonant combinations - ta-marbutah at end of word
+    "ah%": "\u0629"
+    "at%": "\u0629"
+
+    # tanwin at end of word
+    "an%": "\u0627"
+
+    # ayn-alif combo
+    "\u02BBa\u0304\u02BE%": "\u0639\u0627\u0621"
+    "\u02BBa\u0304\u02BC%": "\u0639\u0627\u0621"
+
+    "\u02BBa\u0304": "\u0639\u0627"
+
+    "\u02BBi\u0304y": "\u0639\u064A"
+    "\u02BBi\u0304": "\u0639\u064A"
+
+    "\u02BBu\u0304": "\u0639\u0648"
+    "\u02BBu": "\u0639"
+
+    "%\u02BBa": "\u0639"
+    # "\u02BBa%": "\u0639"
+
+    # alif and hamzas for all occasions
+
+    # truncation necessary? It seems to work fine with.
+
+    "i\u0304\u02BEah%": "\u064A\u0626\u0629"
+    "i\u0304\u02BCah%": "\u064A\u0626\u0629"
+
+    "i\u0304\u02BEat%": "\u064A\u0626\u0629"
+    "i\u0304\u02BCat%": "\u064A\u0626\u0629"
+
+    "i\u02BEa\u0304%": "\u0626\u0627"
+    "i\u02BCa\u0304%": "\u0626\u0627"
+
+    "i\u02BE": "\u0626%"
+    "i\u02BC": "\u0626%"
+    "a\u0304\u02BEa\u0304": "\u0627\u0621\u0627"
+    "a\u0304\u02BCa\u0304": "\u0627\u0621\u0627"
+
+    "a\u02BE": "\u0623"
+    "a\u02BC": "\u0623"
+    "\u02BEi": "\u0626"
+    "\u02BCi": "\u0626"
+    "\u02BEa\u0304": "\u0622"
+    "\u02BCa\u0304": "\u0622"
+    "\u02BEa": "\u0623"
+    "\u02BCa": "\u0623"
+
+    "y\u02BCah": "\u064A\u0626\u0629"
+    "y\u02BEah": "\u064A\u0626\u0629"
+
+    "y\u02BCat": "\u064A\u0626\u0629"
+    "y\u02BEat": "\u064A\u0626\u0629"
+
+    # A
+
+    "a\u0304\u02BCi\u0304": "\u0627\u0626\u064A"
+    "a\u0304\u02BEi\u0304": "\u0627\u0626\u064A"
+
+    "a\u0304\u02BCi": "\u0627\u0626"
+    "a\u0304\u02BEi": "\u0627\u0626"
+    "a\u0304\u02BC": "\u0627\u0621"
+    "a\u0304\u02BE": "\u0627\u0621"
+    "%a\u0304": "\u0622"
+    "a\u0304": "\u0627"
+
+    # These next two lines were intended to convert to alif-ayn when it is at
+    # # the beginning of a word, definite or indefinine (i.e.
+    # al-a[ayn]ma[macron]l or [space]a[ayn]ma[macron]l"
+    "%a\u02BB": "\u0623\u0639"
+    "a\u02BB": "\u0639"
+    "a\u0301": "\u0649"
+
+    "ayy": "\u064A"
+    "%a": "\u0623"
+    "a": ""
+
+    # I - Capital I at beginning of word is usually alif hamzah-below.
+
+    "%i\u0304": "\u064A"
+    "i\u0304y": "\u064A"
+    "iy": "\u064A"
+    "i\u0304": "\u064A"
+    "%\u02BBi": "\u0639"
+
+    # "i\u02BB": "\u0625\u0639"
+
+    "i\u02BE": "\u0626"
+    "i\u02BC": "\u0627\u0626"
+
+    "%i": "\u0625"
+    "i": ""
+
+    # U
+
+    "u\u0304\u02BE": "\u0624"
+    "u\u0304\u02BC": "\u0624"
+    "%u\u0304w": "\u0623\u0648"
+    "%u\u0304": "\u0623\u0648"
+    "u\u0304w": "\u0648"
+    "u\u0304": "\u0648"
+    "u\u02BE": "\u0624"
+    "u\u02BC": "\u0624"
+
+    "%u": "\u0623"
+    "u": ""
+
+    # Consonants, with tashdid added
+
+    "bb": "\u0628"
+    "b": "\u0628"
+    "thth": "\u062B"
+    "th": "\u062B"
+    "t\u0323t\u0323": "\u0637"
+    "t\u0323": "\u0637"
+    "tt": "\u062A"
+    "t": "\u062A"
+    "J": "\u062C"
+    "jj": "\u062C"
+    "j": "\u062C"
+    "h\u0323h\u0323": "\u062D"
+    "h\u0323": "\u062D"
+    "hh": "\u0647"
+    "h": "\u0647"
+    "Kh": "\u062E"
+    "khkh": "\u062E"
+    "kh": "\u062E"
+    "kk": "\u0643"
+    "k": "\u0643"
+    "dhdh": "\u0630"
+    "dh": "\u0630"
+    "d\u0323d\u0323": "\u0636"
+    "d\u0323": "\u0636"
+    "dd": "\u062F"
+    "d": "\u062F"
+    "rr": "\u0631"
+    "r": "\u0631"
+    "z\u0323z\u0323": "\u0638"
+    "z\u0323": "\u0638"
+    "zz": "\u0632"
+    "z": "\u0632"
+    "shsh": "\u0634"
+    "sh": "\u0634"
+    "s\u0323s\u0323": "\u0635"
+    "s\u0323": "\u0635"
+    "ss": "\u0633"
+    "s": "\u0633"
+    "ghgh": "\u063A"
+    "gh": "\u063A"
+    "ff": "\u0641"
+    "f": "\u0641"
+    "qq": "\u0642"
+    "q": "\u0642"
+    "ll": "\u0644"
+    "l": "\u0644"
+    "mm": "\u0645"
+    "m": "\u0645"
+    "nn": "\u0646"
+    "n": "\u0646"
+    "ww": "\u0648"
+    "w": "\u0648"
+    "yy": "\u064A"
+    "y": "\u064A"
+
+    # non-Arabic consonants:
+    "p": "\u067E"
+    "ch": "\u0686"
+    "v": "\u06A4"
+    "g": "\u06AF"
+
+    # Diacritic characters:
+    # ain (\u0639) - not transliterated alone:
+    "\u02BB": "\u0639"
+    # hamza - not romanized
+    # "\u0621"
+    # hamza (alone in final position)
+    "\u02BE%": "\u0621"
+    "\u02BC%": "\u0621"
+
+    # Do not know what, if anything, is needed here:
+    # tatweel:
+    # "\u0640"
+    # fathatan:
+    # "\u064B"
+    # dammatan:
+    # "\u064C"
+    # kasratan:
+    # "\u064D"
+    # fatha:
+    # "\u064E"
+    # damma:
+    # "\u064F"
+    # kasra:
+    # "\u0650"
+    # shadda:
+    # "\u0651"
+    # sukun:
+    # "\u0652"
+    # superscript alef:
+    # "\u0670"
+    # alef wasla
+    # "\u0671"
+
+
+script_to_roman:
+  hooks:
+    post_config:
+      -
+        - arabic.arabic_romanizer.s2r_post_config