رُکُن:RishabhBot/source-code/ks-update-nouns.py

وِکیٖلۄغَتھ پؠٹھٕ

This script was used to update pages of Kashmiri common nouns with new templates.

import pywikibot
import re
site = pywikibot.Site("ks", "wiktionary")
#lst = [i for i in pywikibot.Category(site, "زٲژ:کٲشِرؠ مادٕ عام ناوٕتؠ").articles()][:10]
cat = pywikibot.Category(site, "زٲژ:کٲشِرؠ عام ناوٕتؠ").articles()
lst = [i for i in cat]
def cont_elem(t, lst):
    for i in lst:
        if i in t:
            return True
    return False
def contains_lang(t):
    return cont_elem(t, code_dict.values())
code_dict = {'ks':'کٲشُر', 'en':'اَنٛگریٖزی' ,'it':'اِطٲلوی', 'ur':'اُردوٗ', 'bn':'بَنٛگٲلؠ', 'ps' :'پَشتوٗ', 
    'pa' :'پَنٛجٲبؠ', 'inc-pra' :'پرٛاکرِت', 'ang' :'پرٛون اَنٛگریٖزی', 'roa-opt' :'پرٛون پُرتَگٲلؠ',
    'goh':'پرٛون تھۆد جَرمَن', 'ojp':'پرٛون جاپٲنؠ', 'odt':'پرٛون ڈَچ', 
    'fro':'پرٛون فرانسیٖسی', 'peo':'پرٛون فارسی', 'osp':'پرٛون ہِسپٲنوی', 
    'grc':'پرٛون یوٗنٲنؠ', 'pt':'پُرتَگٲلؠ', 'ta':'تٲمِل', 'tr':'تُرکی', 'de':'جَرمَن', 
    'ja':'جاپٲنؠ', 'nl':'ڈَچ', 'ru':'روٗسی', 'sa':'سَنَسکرٕٛت', 'sd':'سِندی', 'ar': 'عَربی',
    'ota': 'عُثمٲنؠ تُرکی', 'fr': 'فرانسیٖسی', 'fa':'فارسی', 'kn' :'کَنَڑ', 'gu':'گُجرٲتؠ', 
    'la' :'لاطیٖنی', 'mr':'مَرٲٹھؠ', 'ml':'مَلیٲلؠ',  'enm':"مَنٛز اَنٛگریٖزی", 
    'gmh':'مَنٛز تھۆد جَرمَن', 'dum':'مَنٛز ڈَچ', 'frm':'مَنٛز فرانسیٖسی', 'pal':'مَنٛز فارسی',
    'es':'ہِسپٲنوی', 'hi':'ہِندی', 'el':'یوٗنٲنؠ',
}
err_lst = []
not_to_change = []
for page in lst:
    try:
        if  "فرما" not in page.title():  
            text = page.text
            print(page.title())
            ipa = [i for i in re.findall(r"\[(.*?)\]", text) if ':' not in i and '[' not in i]
            after_trans = re.findall(r"تَرجَمہٕ====(.*)?\]", text, re.DOTALL)
            print("After trans: " + str(after_trans))
            summary = "Update with templates: headword, etymology, translations"
            if len(after_trans) ==  1:
                definitions =  re.findall(r"\((.*?)\)", after_trans[0])
            if text.count("''نَر''") == 1:
                g = "m"
                to_change_g = True
            elif text.count("''مادٕ''") == 1: 
                g = "f"
                to_change_g = True
            else:
                to_change_g = False
            cat_name = [i for i in re.findall(r"\[(.*?)\]", text) if 'کٲشِرؠ' and  "زٲژ" in i]
            if len(ipa) != 0:
                out_head = '{{ks-noun|ipa=' + ipa[0] + "|g=" + g + "}}\n"
                to_change_head = True
            else:
                to_change_head = False
            lines = text.split('\n')
            out = ''
            for n, l in enumerate(lines):
                if to_change_head and ipa[0] in l:
                    out += out_head + '\n'
                elif cont_elem(l, cat_name):
                    pass
                elif cont_elem(l, code_dict.values()) and "==" in l:
                    out += l + '\n'
                elif "پؠٹھٕ آمُت" in l:
                    anc_w = re.findall(r"پؠٹھٕ آمُت(.*?)\(", l)[0].strip()
                    anc_lang = re.findall(r"(.*?)\ پؠٹھٕ آمُت", l)[0].strip()
                    tr = re.findall(r"\((.*?)\)", l)[0].strip()
                    anc_code = [k for k,v in code_dict.items() if anc_lang== v][0]
                    out += "{{from|ks|"+anc_code+"|"+anc_w+"|"+tr+"}}\n"
                elif "====تَرجَمہٕ====" in l and "{{trans" not in text:            
                    if len(definitions) == 0:
                        out += "====تَرجَمہٕ====\n{{trans"
                        for k, v in code_dict.items():
                            regex_match = v+" : (.*)?\]\]"                
                            match_lst = re.findall(regex_match, text)
                            #print(match_lst)
                            if len(match_lst) > 1:
                                pass
                            if len(match_lst) != 0:
                                out += '|' + k+'='+ match_lst[0].replace('[[','').replace(']]', '').replace('،', ',').replace(', ',',').replace(' ,', ',')+"|"
                        out += "}}\n"
                    elif len(definitions) >= 1:
                        out += "====تَرجَمہٕ====\n"
                        for n, definition in enumerate(definitions):
                            out += "{{trans|def=" + definition
                            for k, v in code_dict.items():
                                regex_match = v+" : (.*)?\]\]"                
                                match_lst = re.findall(regex_match, text)
                                #print(match_lst)
                                if len(match_lst) != 0:
                                    out += "|" + k+'='+ match_lst[n].replace('[[','').replace(']]', '').replace('،', ',').replace(', ',',').replace(' ,', ',')+"|"
                            out += "}}\n"
                elif contains_lang(l):
                    pass
                elif cont_elem(l, ['(' + i + ')' for i in definitions]) and '*' not in l:
                    pass
                else:
                    out += l + '\n'
            print(out)
            if out.strip() == page.text.strip():
                not_to_change.append(page.title())
            else:
                page.text = out
                page.save(summary)
            print("BREAK")
    except Exception as e:
        err_lst.append(page.title() + " : " + str(e))
print("Errors :" + str(err_lst))
print(str(not_to_change))