View Single Post
Old 04-13-2025, 08:40 AM   #80
lomkiri
Groupie
lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.lomkiri ought to be getting tired of karma fortunes by now.
 
lomkiri's Avatar
 
Posts: 167
Karma: 1497966
Join Date: Jul 2021
Device: N/A
Display the number of occurrences of each html tag in all text files

New version with an option (in the parameters) for printing the list of the impacted files below each tag.
It tries to mimic the tool "Reports", but of course the list is not clckable

N.B. : Each file-list starts with the currently edited file, so it's a good idea to display the first file before to "replace all", if you're asking for the file lists.

Code:
def replace(match, number, file_name, metadata, dictionaries, data, functions, *args, **kwargs):
    """ 2025-04-13
    Counts the number of occurrences for every html tag in an epub
    May be filtered by tag name and by max number of occ.
    Option for listing the impacted files
    
    search regex: <(/w+)    
    """
    
    def plural(word, n):
        return word + ('s' if n >1 else '')
    
    # last passage
    if match == None:

        #### Parameters ###
        # No filter at all if excl = incl = (), and if max_it = None
        #
        # Include only some tags (if defined, deactives any exclusion). E.g.:
        #   incl = [img, svg]  # [] for no inclusion
        # Exclusion of some tags, e.g.:
        #   excl = ['html', 'meta', 'body', 'title', 'div', 'p']    # [] for no exclusion
        # no display if more occurrences than max_it, e.g:
        #   max_it = 5        # None or 0 for no limit
        # Sorting:
        #   sort = 'name'     # 'name' | 'number' | None or '' (any other value will sort by name)
        #   reverse = False   # Reverse sorting if True
        # Optional file list:
        #    showfiles = True # For each tag, show the affected files with the number of occ.
        #                     # This list starts with the file currently displayed	

        incl = []           # () for no filter, ('div',) for only one tag
        excl = []           # () for no filter
        max_it = 0          # 0 or None for no filter
        sort = 'name'       # None or '' for no sorting
        reverse = False
        showfiles = False   # False for no file list
        #####
        
        # Prepare the print of the parameters, if any:
        print_param = []
        sorting = 'List orderted by ' + ('natural order' if not sort
                                        else 'number of occurrences' if sort.lower() == 'number'
                                        else 'name')
        sorting += ' (reversed order)' if sort and reverse else ''
        if incl:
            print_param.append('Include only those tags: ' + ', '.join(incl))
        if excl:
            print_param.append('Exclude those tags: ' + ', '.join(excl))
        if max_it:
            print_param.append(f"Don't print tags with more than {max_it} {plural('occurrence', max_it)}")
        if  showfiles:
            print_param.append('Print also the list of the impacted files (starts at the displayed file)')
            
        # counting by tag
        if incl:
            my_tags = {k: d for k, d in data.items() if k in incl and (not max_it or d['numtags'] <= max_it)}
        else:
            my_tags = {k: d for k, d in data.items() if k not in excl and (not max_it or d['numtags'] <= max_it)}

        # print headers
        print(f'Found a total of {number} {plural("occurrence", number)} and {len(data)} different {plural("tag", len(data))}')
        if print_param:
            print(6*' ' + '\n      '.join(print_param))
        if incl and excl:
            print('You have defined inclusions AND exclusions. Only inclusions have been treated')
        if len(my_tags) == 0:
            print('No occurrences found with those criterias')
        elif len(my_tags) < len(data):
            nums = [my_tags[k]['numtags'] for k in my_tags]
            ntags = sum(nums)
            print(f'Selected a total of {ntags} {plural("occurrence", ntags)} and {len(my_tags)} different {plural("tag", len(my_tags))}')
        print(sorting)
        print('')
        
        if not sort:
            ind = my_tags.keys()
        elif sort.lower() == 'number':
            ind = sorted(my_tags, key=(lambda k: my_tags[k]['numtags']), reverse=reverse)
        else:
            ind = sorted(my_tags, reverse=reverse)
 
        # Print the occurrences by tag
        for key in ind:
            print(f'{key} : {my_tags[key]["numtags"]}')
            if showfiles:
                for f in my_tags[key]["files"]:
                    print(f'{6*" "} {f} : {my_tags[key]["files"][f]}')
        return
    # End of last passage

    # normal passage
    tag = match[1]
    data[tag] = data.get(tag, {})
    data[tag]['numtags'] = data[tag].setdefault('numtags', 0) +1
    data[tag]['files'] = data[tag].get('files', {})
    data[tag]['files'][file_name] = data[tag]['files'].setdefault(file_name, 0) +1
    return match[0]

replace.call_after_last_match = True    # Ask for last passage

Last edited by lomkiri; 04-14-2025 at 11:14 AM.
lomkiri is offline   Reply With Quote