رُکُن:RishabhBot/source-code/ks-update-nouns.py
This script was used to update pages of Kashmiri common nouns with new templates.
import pywikibot
import re
site = pywikibot.Site("ks", "wiktionary")
#lst = [i for i in pywikibot.Category(site, "زٲژ:کٲشِرؠ مادٕ عام ناوٕتؠ").articles()][:10]
cat = pywikibot.Category(site, "زٲژ:کٲشِرؠ عام ناوٕتؠ").articles()
lst = [i for i in cat]
def cont_elem(t, lst):
for i in lst:
if i in t:
return True
return False
def contains_lang(t):
return cont_elem(t, code_dict.values())
code_dict = {'ks':'کٲشُر', 'en':'اَنٛگریٖزی' ,'it':'اِطٲلوی', 'ur':'اُردوٗ', 'bn':'بَنٛگٲلؠ', 'ps' :'پَشتوٗ',
'pa' :'پَنٛجٲبؠ', 'inc-pra' :'پرٛاکرِت', 'ang' :'پرٛون اَنٛگریٖزی', 'roa-opt' :'پرٛون پُرتَگٲلؠ',
'goh':'پرٛون تھۆد جَرمَن', 'ojp':'پرٛون جاپٲنؠ', 'odt':'پرٛون ڈَچ',
'fro':'پرٛون فرانسیٖسی', 'peo':'پرٛون فارسی', 'osp':'پرٛون ہِسپٲنوی',
'grc':'پرٛون یوٗنٲنؠ', 'pt':'پُرتَگٲلؠ', 'ta':'تٲمِل', 'tr':'تُرکی', 'de':'جَرمَن',
'ja':'جاپٲنؠ', 'nl':'ڈَچ', 'ru':'روٗسی', 'sa':'سَنَسکرٕٛت', 'sd':'سِندی', 'ar': 'عَربی',
'ota': 'عُثمٲنؠ تُرکی', 'fr': 'فرانسیٖسی', 'fa':'فارسی', 'kn' :'کَنَڑ', 'gu':'گُجرٲتؠ',
'la' :'لاطیٖنی', 'mr':'مَرٲٹھؠ', 'ml':'مَلیٲلؠ', 'enm':"مَنٛز اَنٛگریٖزی",
'gmh':'مَنٛز تھۆد جَرمَن', 'dum':'مَنٛز ڈَچ', 'frm':'مَنٛز فرانسیٖسی', 'pal':'مَنٛز فارسی',
'es':'ہِسپٲنوی', 'hi':'ہِندی', 'el':'یوٗنٲنؠ',
}
err_lst = []
not_to_change = []
for page in lst:
try:
if "فرما" not in page.title():
text = page.text
print(page.title())
ipa = [i for i in re.findall(r"\[(.*?)\]", text) if ':' not in i and '[' not in i]
after_trans = re.findall(r"تَرجَمہٕ====(.*)?\]", text, re.DOTALL)
print("After trans: " + str(after_trans))
summary = "Update with templates: headword, etymology, translations"
if len(after_trans) == 1:
definitions = re.findall(r"\((.*?)\)", after_trans[0])
if text.count("''نَر''") == 1:
g = "m"
to_change_g = True
elif text.count("''مادٕ''") == 1:
g = "f"
to_change_g = True
else:
to_change_g = False
cat_name = [i for i in re.findall(r"\[(.*?)\]", text) if 'کٲشِرؠ' and "زٲژ" in i]
if len(ipa) != 0:
out_head = '{{ks-noun|ipa=' + ipa[0] + "|g=" + g + "}}\n"
to_change_head = True
else:
to_change_head = False
lines = text.split('\n')
out = ''
for n, l in enumerate(lines):
if to_change_head and ipa[0] in l:
out += out_head + '\n'
elif cont_elem(l, cat_name):
pass
elif cont_elem(l, code_dict.values()) and "==" in l:
out += l + '\n'
elif "پؠٹھٕ آمُت" in l:
anc_w = re.findall(r"پؠٹھٕ آمُت(.*?)\(", l)[0].strip()
anc_lang = re.findall(r"(.*?)\ پؠٹھٕ آمُت", l)[0].strip()
tr = re.findall(r"\((.*?)\)", l)[0].strip()
anc_code = [k for k,v in code_dict.items() if anc_lang== v][0]
out += "{{from|ks|"+anc_code+"|"+anc_w+"|"+tr+"}}\n"
elif "====تَرجَمہٕ====" in l and "{{trans" not in text:
if len(definitions) == 0:
out += "====تَرجَمہٕ====\n{{trans"
for k, v in code_dict.items():
regex_match = v+" : (.*)?\]\]"
match_lst = re.findall(regex_match, text)
#print(match_lst)
if len(match_lst) > 1:
pass
if len(match_lst) != 0:
out += '|' + k+'='+ match_lst[0].replace('[[','').replace(']]', '').replace('،', ',').replace(', ',',').replace(' ,', ',')+"|"
out += "}}\n"
elif len(definitions) >= 1:
out += "====تَرجَمہٕ====\n"
for n, definition in enumerate(definitions):
out += "{{trans|def=" + definition
for k, v in code_dict.items():
regex_match = v+" : (.*)?\]\]"
match_lst = re.findall(regex_match, text)
#print(match_lst)
if len(match_lst) != 0:
out += "|" + k+'='+ match_lst[n].replace('[[','').replace(']]', '').replace('،', ',').replace(', ',',').replace(' ,', ',')+"|"
out += "}}\n"
elif contains_lang(l):
pass
elif cont_elem(l, ['(' + i + ')' for i in definitions]) and '*' not in l:
pass
else:
out += l + '\n'
print(out)
if out.strip() == page.text.strip():
not_to_change.append(page.title())
else:
page.text = out
page.save(summary)
print("BREAK")
except Exception as e:
err_lst.append(page.title() + " : " + str(e))
print("Errors :" + str(err_lst))
print(str(not_to_change))