-
+ 5DE963EB326E8F107264FB5D2DCEAF715B8DAFF649353295FF19BFAA560946BD856F8970C69B1B6360FB003B7548FA78302423ECF83512A4BFF43CFD3973F628
logotron/reader.py
(0 . 0)(1 . 441)
761 #!/usr/bin/python
762
763 ##############################################################################
764 import ConfigParser, sys
765 import psycopg2, psycopg2.extras
766 import psycopg2.extensions
767 psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
768 psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY)
769 import time
770 import datetime
771 from datetime import timedelta
772 import sys
773 reload(sys)
774 sys.setdefaultencoding('utf8')
775 import os
776 import threading
777 import re
778 from datetime import datetime
779 from urlparse import urljoin
780 from flask import Flask, request, session, url_for, redirect, \
781 render_template, abort, g, flash, _app_ctx_stack, make_response, \
782 jsonify
783 from flask import Flask
784 from flask.ext.cache import Cache
785 ##############################################################################
786
787 ##############################################################################
788 # Single mandatory arg: config file path
789 if len(sys.argv[1:]) != 1:
790 # If no args, print usage and exit:
791 print sys.argv[0] + " CONFIG"
792 exit(0)
793
794 # Read Config from given conf file
795 config_path = os.path.abspath(sys.argv[1])
796 cfg = ConfigParser.ConfigParser()
797 cfg.readfp(open(config_path))
798
799 try:
800 # IRCism:
801 Nick = cfg.get("irc", "nick")
802 Channels = [x.strip() for x in cfg.get("irc", "chans").split(',')]
803 Bots = [x.strip() for x in cfg.get("logotron", "bots").split(',')]
804 Bots.append(Nick) # Add our own bot to the bot list
805 # DBism:
806 DB_Name = cfg.get("db", "db_name")
807 DB_User = cfg.get("db", "db_user")
808 DB_DEBUG = cfg.get("db", "db_debug")
809 # Logism:
810 Base_URL = cfg.get("logotron", "base_url")
811 Era = int(cfg.get("logotron", "era"))
812 # WWW:
813 WWW_Port = int(cfg.get("logotron", "www_port"))
814
815 except Exception as e:
816 print "Invalid config: ", e
817 exit(1)
818
819 ##############################################################################
820
821 ##############################################################################
822 ### Knobs not made into config yet ###
823 Default_Chan = Channels[0]
824 Min_Query_Length = 3
825 Max_Search_Results = 1000
826
827 ## Format for Date in Log Lines
828 Date_Short_Format = "%Y-%m-%d"
829
830 ## WWW Debug Knob
831 DEBUG = False
832 ##############################################################################
833
834 app = Flask(__name__)
835 cache = Cache(app,config={'CACHE_TYPE': 'simple'})
836 app.config.from_object(__name__)
837
838 def get_db():
839 db = getattr(g, 'db', None)
840 if db is None:
841 db = g.db = psycopg2.connect("dbname=%s user=%s" % (DB_Name, DB_User))
842 return db
843
844 def close_db():
845 if hasattr(g, 'db'):
846 g.db.close()
847
848 @app.before_request
849 def before_request():
850 g.db = get_db()
851
852 @app.teardown_request
853 def teardown_request(exception):
854 close_db()
855
856 def query_db(query, args=(), one=False):
857 cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
858 if (DB_DEBUG): print "query: '{0}'".format(query)
859 cur.execute(query, args)
860 rv = cur.fetchone() if one else cur.fetchall()
861 if (DB_DEBUG): print "query res: '{0}'".format(rv)
862 return rv
863
864 def exec_db(query, args=()):
865 cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
866 if (DB_DEBUG): print "query: '{0}'".format(query)
867 if (DB_DEBUG): print "args: '{0}'".format(args)
868 if (DB_DEBUG): print "EXEC:"
869 cur.execute(query, args)
870
871 def getlast_db():
872 cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
873 cur.execute('select lastval()')
874 return cur.fetchone()['lastval']
875
876 def commit_db():
877 cur = get_db().cursor(cursor_factory=psycopg2.extras.RealDictCursor)
878 g.db.commit()
879
880 ##############################################################################
881
882 ## All eggogs redirect to main page
883 @app.errorhandler(404)
884 def page_not_found(error):
885 return redirect(url_for('log'))
886
887 ##############################################################################
888
889 html_escape_table = {
890 "&": "&",
891 '"': """,
892 "'": "'",
893 ">": ">",
894 "<": "<",
895 }
896
897 def html_escape(text):
898 return "".join(html_escape_table.get(c,c) for c in text)
899
900 ##############################################################################
901
902 ## Get base URL
903 def get_base():
904 if DEBUG:
905 return request.host_url
906 return Base_URL
907
908
909 # Get perma-URL corresponding to given log line
910 def line_url(l):
911 return "{0}log/{1}/{2}#{3}".format(get_base(),
912 l['chan'],
913 l['t'].strftime(Date_Short_Format),
914 l['idx'])
915
916 def gen_chanlist(selected_chan):
917 # Get current time
918 now = datetime.now()
919
920 s = """<table align="center" class="chantable"><tr>"""
921 for chan in Channels:
922 chan_formed = chan
923 if chan == selected_chan:
924 chan_formed = "<span class='highlight'>" + chan + "</span>"
925 s += """<th><a href="{0}log/{1}">{2}</a></th>""".format(
926 get_base(), chan, chan_formed)
927 s += "</tr><tr>"
928
929 for chan in Channels:
930
931 last_time = query_db(
932 '''select t, idx from loglines where chan=%s
933 and idx = (select max(idx) from loglines where chan=%s) ;''',
934 [chan, chan], one=True)
935
936 last_time_txt = ""
937 if last_time != None:
938 span = (now - last_time['t'])
939 days = span.days
940 hours = span.seconds/3600
941 minutes = (span.seconds%3600)/60
942
943 if days != 0:
944 last_time_txt += '%dd ' % days
945 if hours != 0:
946 last_time_txt += '%dh ' % hours
947 if minutes != 0:
948 last_time_txt += '%dm' % minutes
949
950 s += """<td><i><a href="{0}log/{1}/{2}#{3}">{4}</a></i></td>""".format(
951 get_base(),
952 chan,
953 last_time['t'].strftime(Date_Short_Format),
954 last_time['idx'],
955 last_time_txt)
956
957 else:
958 last_time_txt = ""
959 s += "<td></td>"
960
961 s += "</tr></table>"
962 return s
963
964
965 # Make above callable from inside htm templater:
966 app.jinja_env.globals.update(gen_chanlist=gen_chanlist)
967
968
969 # HTML Tag Regex
970 tag_regex = re.compile("(<[^>]+>)")
971
972
973 # Find the segments of a block of text which constitute HTML tags
974 def get_link_intervals(str):
975 links = []
976 span = []
977 for match in tag_regex.finditer(str):
978 span = match.span()
979 links += [span]
980 return links
981
982
983 # Highlight all matched tokens in given text
984 def highlight_matches(strings, text):
985 e = '(' + ('|'.join(strings)) + ')'
986 return re.sub(e,
987 r"""<span class='highlight'>\1</span>""",
988 text,
989 flags=re.I)
990
991
992 # Highlight matched tokens in the display of a search result logline,
993 # but leave HTML tags alone
994 def highlight_text(strings, text):
995 result = ""
996 last = 0
997 for i in get_link_intervals(text):
998 i_start, i_end = i
999 result += highlight_matches(strings, text[last:i_start])
1000 result += text[i_start:i_end] # the HTML tag, leave it alone
1001 last = i_end
1002 result += highlight_matches(strings, text[last:]) # last block
1003 return result
1004
1005
1006 # Regexps used in format_logline:
1007 boxlinks_re = re.compile('\[\s*<a href="(http[^ \[\]]+)">[^ <]+</a>\s*\]\[([^\[\]]+)\]')
1008 stdlinks_re = re.compile('(http[^ \[\]]+)')
1009
1010
1011 ## Format given log line for display
1012 def format_logline(l, highlights = []):
1013 payload = html_escape(l['payload'])
1014
1015 # Format ordinary links:
1016 payload = re.sub(stdlinks_re, r'<a href="\1">\1</a>', payload)
1017
1018 # Now also format [link][text] links :
1019 payload = re.sub(boxlinks_re, r'<a href="\1">\2</a>', payload)
1020
1021 # If this is a search result, illuminate the matched strings:
1022 if highlights != []:
1023 payload = highlight_text(highlights, payload)
1024
1025 bot = ""
1026 if l['speaker'] in Bots:
1027 bot = " bot"
1028
1029 # HTMLize the given line :
1030 s = ("<div id='{0}' class='{1}{5}'>"
1031 "<a class='nick' title='{2}'"
1032 " href=\"{3}\">{1}</a>: {4}</div>").format(l['idx'],
1033 l['speaker'],
1034 l['t'],
1035 line_url(l),
1036 payload,
1037 bot)
1038
1039 return s
1040
1041 # Make above callable from inside htm templater:
1042 app.jinja_env.globals.update(format_logline=format_logline)
1043
1044
1045 # Generate navbar for the given date:
1046 def generate_navbar(date, tail, chan):
1047 cur_day = datetime.strptime(date, Date_Short_Format)
1048 prev_day = cur_day - timedelta(days=1)
1049 prev_day_txt = prev_day.strftime(Date_Short_Format)
1050
1051 s = "<a href='{0}log/{1}/{2}'>← {2}</a>".format(
1052 get_base(),
1053 chan,
1054 prev_day_txt)
1055
1056 if not tail:
1057 next_day = cur_day + timedelta(days=1)
1058 next_day_txt = next_day.strftime(Date_Short_Format)
1059 s = s + " | <a href='{0}log/{1}/{2}'>{2} →</a>".format(
1060 get_base(),
1061 chan,
1062 next_day_txt)
1063
1064 return s
1065
1066 # Make above callable from inside htm templater:
1067 app.jinja_env.globals.update(generate_navbar=generate_navbar)
1068
1069
1070 @app.route('/log/<chan>/<date>')
1071 @app.route('/log/<chan>', defaults={'date': None})
1072 @app.route('/log/', defaults={'chan': Default_Chan, 'date': None})
1073 @app.route('/log', defaults={'chan': Default_Chan, 'date': None})
1074 def log(chan, date):
1075 # Handle rubbish chan:
1076 if chan not in Channels:
1077 return redirect(url_for('log'))
1078
1079 # Get current time
1080 now = datetime.now()
1081
1082 # Whether we are viewing 'current' tail
1083 tail = False
1084
1085 # If viewing 'current' log:
1086 if date == None:
1087 date = now.strftime(Date_Short_Format)
1088 tail = True
1089
1090 # Parse given date, and redirect to default log if rubbish:
1091 try:
1092 day_start = datetime.strptime(date, Date_Short_Format)
1093 except Exception, e:
1094 return redirect(url_for('log'))
1095
1096 # Determine the end of the interval being shown
1097 day_end = day_start + timedelta(days=1)
1098
1099 # Get the loglines from DB
1100 lines = query_db(
1101 '''select * from loglines where chan=%s
1102 and t between %s and %s order by idx asc;''',
1103 [chan, day_start, day_end], one=False)
1104
1105 # Return the HTMLized text
1106 return render_template('log.html',
1107 chan = chan,
1108 loglines = lines,
1109 date = date,
1110 tail = tail)
1111
1112
1113
1114 Name_Chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"
1115
1116 def sanitize_speaker(s):
1117 return "".join([ch for ch in s if ch in Name_Chars])
1118
1119
1120 def re_escape(s):
1121 return re.sub(r"[(){}\[\].*?|^$\\+-]", r"\\\g<0>", s)
1122
1123 # Search knob. Supports 'chan' parameter.
1124 @app.route('/log-search')
1125 def logsearch():
1126 # The query params:
1127 chan = request.args.get('chan', default = Default_Chan, type = str)
1128 query = request.args.get('q', default = '', type = str)
1129 # page_num = request.args.get('page', default = 0, type = int)
1130
1131 # Handle rubbish chan:
1132 if chan not in Channels:
1133 return redirect(url_for('log'))
1134
1135 nres = 0
1136 searchres = []
1137 tokens_orig = []
1138 search_head = "Query is too short!"
1139 # Forbid query that is too short:
1140 if len(query) >= Min_Query_Length:
1141 # Get the search tokens to use:
1142 tokens = query.split()
1143 tokens_standard = []
1144 from_users = []
1145
1146 # separate out "from:foo" tokens and ordinary:
1147 for t in tokens:
1148 if t.startswith("from:") or t.startswith("f:"):
1149 from_users.append(t.split(':')[1]) # Record user for 'from' query
1150 else:
1151 tokens_standard.append(t)
1152
1153 from_users = ['%' + sanitize_speaker(t) + '%' for t in from_users]
1154 tokens_orig = [re_escape(t) for t in tokens_standard]
1155 tokens_formed = ['%' + t + '%' for t in tokens_orig]
1156
1157 # Query is usable; perform the search on DB and get the finds
1158 if from_users == []:
1159 searchres = query_db(
1160 '''select * from loglines where chan=%s
1161 and payload ilike all(%s) order by idx desc limit %s;''',
1162 [chan,
1163 tokens_formed,
1164 Max_Search_Results], one=False)
1165 else:
1166 print "from=", from_users
1167
1168 searchres = query_db(
1169 '''select * from loglines where chan=%s
1170 and speaker ilike any(%s)
1171 and payload ilike all(%s) order by idx desc limit %s;''',
1172 [chan,
1173 from_users,
1174 tokens_formed,
1175 Max_Search_Results], one=False)
1176
1177
1178 # Number of entries found
1179 nres = len(searchres)
1180 search_head = "<b>{0}</b> entries found in {1} for <b>'{2}'</b> :".format(
1181 nres, chan, html_escape(query))
1182
1183 # No paging support just yet:
1184 return render_template('searchres.html',
1185 query = query,
1186 nres = nres,
1187 chan = chan,
1188 search_head = search_head,
1189 tokens = tokens_orig,
1190 loglines = searchres)
1191
1192
1193 # Comment this out if you don't have one
1194 @app.route('/favicon.ico')
1195 def favicon():
1196 return redirect(url_for('static', filename='favicon.ico'))
1197
1198
1199 ## App Mode
1200 if __name__ == '__main__':
1201 app.run(threaded=True, port=WWW_Port)