voicegen

Running

voicegen / text /sanskrit.py

Upload 66 files

c6070db almost 2 years ago

1.47 kB

	import re
	from indic_transliteration import sanscript


	# List of (iast, ipa) pairs:
	_iast_to_ipa = [(re.compile('%s' % x[0]), x[1]) for x in [
	('a', 'ə'),
	('ā', 'aː'),
	('ī', 'iː'),
	('ū', 'uː'),
	('ṛ', 'ɹ`'),
	('ṝ', 'ɹ`ː'),
	('ḷ', 'l`'),
	('ḹ', 'l`ː'),
	('e', 'eː'),
	('o', 'oː'),
	('k', 'k⁼'),
	('k⁼h', 'kʰ'),
	('g', 'g⁼'),
	('g⁼h', 'gʰ'),
	('ṅ', 'ŋ'),
	('c', 'ʧ⁼'),
	('ʧ⁼h', 'ʧʰ'),
	('j', 'ʥ⁼'),
	('ʥ⁼h', 'ʥʰ'),
	('ñ', 'n^'),
	('ṭ', 't`⁼'),
	('t`⁼h', 't`ʰ'),
	('ḍ', 'd`⁼'),
	('d`⁼h', 'd`ʰ'),
	('ṇ', 'n`'),
	('t', 't⁼'),
	('t⁼h', 'tʰ'),
	('d', 'd⁼'),
	('d⁼h', 'dʰ'),
	('p', 'p⁼'),
	('p⁼h', 'pʰ'),
	('b', 'b⁼'),
	('b⁼h', 'bʰ'),
	('y', 'j'),
	('ś', 'ʃ'),
	('ṣ', 's`'),
	('r', 'ɾ'),
	('l̤', 'l`'),
	('h', 'ɦ'),
	("'", ''),
	('~', '^'),
	('ṃ', '^')
	]]


	def devanagari_to_ipa(text):
	text = text.replace('ॐ', 'ओम्')
	text = re.sub(r'\s।\s$', '.', text)
	text = re.sub(r'\s।\s', ', ', text)
	text = re.sub(r'\s*॥', '.', text)
	text = sanscript.transliterate(text, sanscript.DEVANAGARI, sanscript.IAST)
	for regex, replacement in _iast_to_ipa:
	text = re.sub(regex, replacement, text)
	text = re.sub('(.)[`ː]*ḥ', lambda x: x.group(0)
	[:-1]+'h'+x.group(1)+'*', text)
	return text