一个项目不断维护总会导致一些角落隐藏着无用代码,对于相对洁癖的人来说能找到这部分代码并清理掉是最好的。

django对外提供的服务可以从urls.py中找到痕迹,对此我们可以对比服务端的访问请求日志分析出系统中定义的哪些url是实际无人使用的。

分析的思路很简单,从urls.py中找出所有定义的url匹配规则,再读入访问日志,最终输出没有在日志中出现的匹配规则。

 # -*- coding:utf-8 -*-
 from django.core.management.base import BaseCommand
 from django.utils.importlib import import_module
 from django.core.urlresolvers import RegexURLPattern, RegexURLResolver
 import importlib
 import re
 import os
 import datetime
 import logging
 import json
 import urls

 class Command(BaseCommand):
     '''
     获取当前项目中定义的所有url pattern用于后续分析观察
     '''
     def __init__(self):
         super(Command, self).__init__()

     def handle(self, *args, **options):
         '''
         python manage.py fetch_unused_urls log_path
         '''
         patterns = self._get_urlpatterns([''], urls.urlpatterns)
         exprs = [self._build_pattern_regex(pattern) for pattern in patterns]
         unused_urls = self._find_unused_urls(exprs, args[0])
         for url in unused_urls:
             print url

     def _find_unused_urls(self, exprs, log_path):
         '''
         查找已经不再使用的url
         '''
         def _need_escape(url):
             return url.startswith('/admin')

         def _is_matched(url, re_expr):
             return not re_expr.search(url) is None

         expr_re_map = {expr: re.compile(expr, re.UNICODE) for expr in exprs}

         used_set = set()
         for url in open(log_path):
             if _need_escape(url):
                 continue
             url_matched = False
             for expr, re_expr in expr_re_map.items():
                 if _is_matched(url, re_expr):
                     used_set.add(expr)
                     url_matched = True
                     break
             if not url_matched:
                 print 'url match not found for %s' % url

         return [expr for expr in exprs if not expr in used_set]

     def _build_pattern_regex(self, pattern):
         '''
         构建可以用于匹配真实访问请求的regex
         '''
         def _escape_start(str, c='^'):
             if str.startswith(c):
                 str = str[1:]
             return str

         def _escape_end(str, c='$'):
             if str.endswith(c):
                 str = str[:-1]
             return str

         def _escape(str):
             str = _escape_start(str)
             str = _escape_end(str)
             return str

         prefix_str = reduce(lambda x, y: x + y, [_escape(prefix) for prefix in pattern[0]])
         prefix_str = prefix_str if prefix_str.endswith('/') else prefix_str + '/'
         prefix_str = _escape_start(prefix_str, '/')

         expr = _escape_start(pattern[1].regex.pattern)
         return '^/' + prefix_str + expr

     def _get_urlpatterns(self, prefix, urlpatterns):
         result = []
         for pattern in urlpatterns:
             if isinstance(pattern, RegexURLPattern):
                 result.append((prefix, pattern))
             elif isinstance(pattern, RegexURLResolver):
                 new_prefix = [item for item in prefix]
                 new_prefix.append(pattern.regex.pattern)
                 if isinstance(pattern.urlconf_name, list):
                     continue
                 result.extend(self._get_urlpatterns(new_prefix, importlib.import_module(pattern.urlconf_name).urlpatterns))
         return result