2016-11-11 18:44:29 +00:00
import cgi
2016-08-02 02:38:48 +00:00
import configparser
2016-08-01 18:04:32 +00:00
import enum
2016-08-05 13:18:33 +00:00
import ipaddress
2016-07-12 14:11:58 +00:00
import os
2016-08-02 02:38:48 +00:00
import pathlib
2016-07-12 14:11:58 +00:00
import select
import socket
2016-08-02 02:38:48 +00:00
import stat
2016-08-02 22:13:45 +00:00
import subprocess
2016-07-12 14:11:58 +00:00
import sys
import threading
2016-09-10 20:08:06 +00:00
import time
2016-08-02 00:27:38 +00:00
import urllib . parse
2016-07-12 14:11:58 +00:00
2016-08-01 21:04:50 +00:00
class default_config : None
2016-07-12 14:11:58 +00:00
2016-08-05 13:18:33 +00:00
default_config . blacklist_file = pathlib . Path ( os . environ [ ' HOME ' ] ) / ' gopher_blacklist '
2016-08-02 17:09:01 +00:00
default_config . charset = ' utf-8 '
2016-08-02 02:38:48 +00:00
default_config . fallback_mimetype = ' application/octet-stream '
default_config . gopher_root = pathlib . Path ( os . environ [ ' HOME ' ] ) / ' gopher '
2016-08-01 21:04:50 +00:00
default_config . max_threads = 8192
2016-08-02 22:14:58 +00:00
default_config . port = 7070
2018-06-19 15:13:01 +00:00
default_config . recognised_itemtypes = [ ' 0 ' , ' 1 ' , ' 5 ' , ' 9 ' , ' g ' , ' h ' , ' I ' , ' s ' ]
2016-08-01 21:04:50 +00:00
default_config . request_max_size = 8192
2018-07-15 13:31:47 +00:00
default_config . socket_timeout = 20
2018-06-19 15:13:01 +00:00
default_config . no_itemtype_whitelist = { ' robots.txt ' , ' favicon.ico ' }
2016-11-11 18:44:29 +00:00
default_config . hurl_redirect_page = """ <!DOCTYPE html>
< html >
< head >
< meta http - equiv = " refresh " content = " 2; url=__raw_url__ " / >
< title > Redirecting to __escaped_url__ < / title >
2018-08-14 09:31:01 +00:00
< style > body { max - width : 70 ch ; margin : auto ; } < / style >
2016-11-11 18:44:29 +00:00
< head >
< body >
< p > Your gopher client doesn ' t support the hURL specification. If you are not redirected after 2s, click the link.</p>
< p > Redirecting to < a href = " __raw_url__ " > __escaped_url__ < / a > < / p >
< / body >
< / html > """
2016-07-12 14:11:58 +00:00
# error(message)
# Print error message to stderr
def error ( message ) :
program_name = os . path . basename ( sys . argv [ 0 ] )
2016-09-10 20:08:06 +00:00
print ( ' %s : %s Error: %s ' % ( program_name , time . strftime ( ' % Y- % m- %d % H: % M: % S ' ) , message ) , file = sys . stderr )
sys . stderr . flush ( )
2016-07-12 14:11:58 +00:00
2016-08-01 21:20:19 +00:00
# die(message, status = 1) → (Never returns)
2016-07-12 14:11:58 +00:00
# Print error message to stderr and exit with status code
def die ( message , status = 1 ) :
error ( message )
sys . exit ( status )
2016-08-05 13:18:33 +00:00
# log(message)
# Print a log message to stdout
def log ( message ) :
program_name = os . path . basename ( sys . argv [ 0 ] )
2016-09-10 20:08:06 +00:00
print ( ' %s : %s %s ' % ( program_name , time . strftime ( ' % Y- % m- %d % H: % M: % S ' ) , message ) )
sys . stdout . flush ( )
2016-08-05 13:18:33 +00:00
2016-08-01 22:14:19 +00:00
# A base for Exeptions that are used with one argument and that return a string that incorporates said argument
class OneArgumentException ( Exception ) :
def __init__ ( self , argument ) :
self . argument = argument
def __str__ ( self ) :
return self . text % self . argument
2016-08-02 00:27:38 +00:00
class UnreachableException ( Exception ) :
def __str__ ( self ) :
return ' Declared unreachable '
# unreachable() → (Never returns)
# Used to mark a codepath that should never execute
def unreachable ( ) :
raise UnreachableException
2016-07-12 14:11:58 +00:00
# bind(port, backlog = 1) → [sockets...]
# Binds to all available (TCP) interfaces on specified port and returns the sockets
# backlog controls how many connections allowed to wait handling before system drops new ones
def bind ( port , backlog = 1 ) :
# Based on code in https://docs.python.org/3/library/socket.html
sockets = [ ]
for res in socket . getaddrinfo ( None , port , socket . AF_UNSPEC , socket . SOCK_STREAM , 0 , socket . AI_PASSIVE ) :
af , socktype , proto , canonname , sa = res
2016-08-02 00:27:38 +00:00
2016-07-12 14:11:58 +00:00
try :
s = socket . socket ( af , socktype , proto )
except OSError :
continue
2016-08-02 22:19:08 +00:00
2016-07-12 14:16:21 +00:00
# Make IPv6 socket only bind on IPv6 address, otherwise may clash with IPv4 and not get enabled
if af == socket . AF_INET6 :
try :
s . setsockopt ( socket . IPPROTO_IPV6 , socket . IPV6_V6ONLY , 1 )
except OSError :
pass
2016-08-02 22:19:08 +00:00
# Set SO_REUSEADDR for less painful server restarting
s . setsockopt ( socket . SOL_SOCKET , socket . SO_REUSEADDR , 1 )
2016-07-12 14:11:58 +00:00
try :
s . bind ( sa )
s . listen ( backlog )
except OSError :
s . close ( )
continue
sockets . append ( s )
2016-08-02 00:27:38 +00:00
2016-07-12 14:11:58 +00:00
return sockets
# drop_privileges()
# Drops set[ug]id, die()s if unsuccesful
def drop_privileges ( ) :
try :
uid = os . getuid ( )
gid = os . getgid ( )
os . setresgid ( gid , gid , gid )
os . setresuid ( uid , uid , uid )
except :
die ( ' Unable to drop privileges ' )
2016-08-02 00:27:38 +00:00
class CommandError ( OneArgumentException ) :
text = ' Error with command: %s '
2020-05-28 17:43:36 +00:00
class SocketTimeoutError ( Exception ) :
pass
2016-08-02 00:27:38 +00:00
2016-08-02 17:47:51 +00:00
class ReaderCommands ( enum . Enum ) :
2016-08-02 00:27:38 +00:00
stop = range ( 1 )
# SocketReader(sock) → <SocketReader instance>
# next(<SocketReader instance>) → byte_of_data
# Wraps a socket and exposes it as per-byte iterator. Does not close the socket when it exits
def SocketReader ( sock ) :
chunk = b ' '
while True :
2016-08-02 17:47:51 +00:00
for index in range ( len ( chunk ) ) :
command = yield chunk [ index ]
2016-08-02 00:27:38 +00:00
if command is not None :
2016-08-02 17:47:51 +00:00
if command == ReaderCommands . stop :
2016-08-02 00:34:15 +00:00
# Return the rest of data in buffer
2016-08-02 17:47:51 +00:00
return chunk [ index + 1 : ]
2016-08-02 00:27:38 +00:00
else :
raise CommandError ( ' %s not recognised ' % repr ( command ) )
try :
chunk = sock . recv ( 1024 )
2020-05-28 17:43:36 +00:00
except socket . timeout as err :
raise SocketTimeoutError ( ' Remote end timed out ' ) from err
2016-08-02 00:27:38 +00:00
if not chunk :
break
2016-08-02 17:47:51 +00:00
# FileReader(file) → <FileReader instance>
# next(<FileReader instance>) → byte_of_data
# Wraps a bytefile object and exposes it as per-byte iterator. Does not close the file when it exits
def FileReader ( file ) :
chunk = b ' '
while True :
for index in range ( len ( chunk ) ) :
command = yield chunk [ index ]
if command is not None :
if command == ReaderCommands . stop :
# Return the rest of data in buffer
return chunk [ index + 1 : ]
else :
raise CommandError ( ' %s not recognised ' % repr ( command ) )
chunk = file . read ( 1024 )
if not chunk :
break
# StringReader(string) → <StringReader instance>
# next(<StringReader instance>) → byte_of_data
# Wraps a unicode string in a inteface like SocketReader or FileReader
def StringReader ( string ) :
encoded = string . encode ( ' utf-8 ' )
for index in range ( len ( encoded ) ) :
command = yield encoded [ index ]
if command is not None :
if command == ReaderCommands . stop :
# Return the rest of data
return encoded [ index + 1 : ]
else :
raise CommandError ( ' %s not recognised ' % repr ( command ) )
2018-06-19 15:13:01 +00:00
# extract_itemtype_path(itemtype_path, *, config) → itemtype, path
# Extract itemtype and path components from a HTTP path
def extract_itemtype_path ( itemtype_path , * , config ) :
2018-01-27 17:34:11 +00:00
# URL unquote the path
2018-06-19 15:13:01 +00:00
itemtype_path = urllib . parse . unquote ( itemtype_path )
2018-01-27 17:34:11 +00:00
2018-06-19 15:13:01 +00:00
if len ( itemtype_path ) > 0 and itemtype_path [ 0 ] == ' / ' :
itemtype_path = itemtype_path [ 1 : ]
2016-08-02 00:27:38 +00:00
2018-06-19 15:13:01 +00:00
if len ( itemtype_path ) == 0 : # / is by default of type 1
itemtype = ' 1 '
path = itemtype_path
elif itemtype_path in config . no_itemtype_whitelist : # Have a whitelist for itemtypeless files
itemtype = None
path = itemtype_path
else : # Extract the itemtype
itemtype = itemtype_path [ 0 ]
path = itemtype_path [ 1 : ]
2016-08-02 00:27:38 +00:00
2018-06-19 15:13:01 +00:00
return itemtype , path
2016-07-12 14:11:58 +00:00
2016-08-01 22:33:02 +00:00
class PathError ( OneArgumentException ) :
text = ' Error with request path: %s '
# normalize_path(path, *, config) → normalized_path
# Normalize the path or raise an exception if the path is malformed
def normalize_path ( path , * , config ) :
path_components = path . split ( ' / ' )
normalized_components = [ ]
for component in path_components :
if component == ' ' :
# A dummy left by // or / in beginning or end, ignore
continue
elif component == ' . ' :
# foo/. = foo, ./bar = bar, ignore
continue
elif component == ' .. ' :
# foo/bar/.. = foo, drop last component
# This equality does not always hold in a real unix system. However, there are two reasons these semantics are used
# 1. Gopher has no concept of symlinks, and many clients have "parent directory" option that drops last component of path
# 2. This allows for safe usage of symlinks in gopherroot to outside of it, rogue request can't escape to parent directory
if len ( normalized_components ) > 0 : # Ensure we have a component to drop and drop it
normalized_components . pop ( )
else :
# Attempted .. on an empty path, means attempting to point outside gopherroot
raise PathError ( ' Path points outside gopherroot ' )
else :
# A normal path component, add to the normalized path
normalized_components . append ( component )
2016-08-02 00:27:38 +00:00
return ' / ' . join ( normalized_components )
2016-08-01 22:14:19 +00:00
class RequestError ( OneArgumentException ) :
2020-05-28 17:43:36 +00:00
text = ' Error when handling request: %s '
class EmptyRequestError ( OneArgumentException ) :
text = ' Got an empty request: %s '
2016-08-01 22:14:19 +00:00
2016-08-02 00:27:38 +00:00
class Protocol ( enum . Enum ) :
gopher , gopherplus , http = range ( 3 )
2018-06-16 18:48:35 +00:00
# get_request(sock, *, config) → path, protocol, *rest
2016-07-12 14:11:58 +00:00
# Read request from socket and parse it.
# path is the requested path, protocol is Protocol.gopher or Protocol.http depending on the request protocol
# rest is protocol-dependant information
2016-08-02 00:27:38 +00:00
def get_request ( sockreader , * , config ) :
protocol = None
2018-06-16 18:48:35 +00:00
just_headers = False
2016-08-02 00:27:38 +00:00
request = bytearray ( )
2016-07-12 14:11:58 +00:00
while True :
2016-07-12 14:33:59 +00:00
try :
2016-08-02 00:27:38 +00:00
request . append ( next ( sockreader ) )
except StopIteration : # Other end hung up before sending a full header
2020-05-28 17:43:36 +00:00
if len ( request ) == 0 :
raise EmptyRequestError ( ' Remote end hung up unexpectedly ' )
else :
print ( ' request: ' , request ) #debg
raise RequestError ( ' Remote end hung up unexpectedly ' )
except SocketTimeoutError as err :
if len ( request ) == 0 :
raise EmptyRequestError ( ' Remote end timed out ' ) from err
else :
print ( ' request: ' , request ) #debg
raise err
2016-08-02 00:27:38 +00:00
2016-07-12 14:38:53 +00:00
if len ( request ) > = config . request_max_size :
raise RequestError ( ' Request too long ' )
2016-07-12 14:11:58 +00:00
2016-08-02 00:27:38 +00:00
# We have enough data to recognise a HTTP request
2018-06-16 18:48:35 +00:00
if protocol is None and len ( request ) > = 5 :
2016-08-02 00:27:38 +00:00
# Does it look like a HTTP GET request?
2018-06-16 18:48:35 +00:00
if request [ : 3 ] == b ' GET ' and chr ( request [ 3 ] ) in [ ' ' , ' \r ' , ' \t ' ] :
2016-08-02 00:27:38 +00:00
# Yes, mark HTTP as protocol
protocol = Protocol . http
2018-06-16 18:48:35 +00:00
# Does it look like a HTTP HEAD request?
elif request [ : 4 ] == b ' HEAD ' and chr ( request [ 4 ] ) in [ ' ' , ' \r ' , ' \t ' ] :
# Yes, mark HTTP as the protocol and that we'll only return the headers
protocol = Protocol . http
just_headers = True
2016-08-02 00:27:38 +00:00
else :
# No, mark Gopher as protocol
protocol = Protocol . gopher
2016-07-12 14:11:58 +00:00
2018-06-16 18:48:35 +00:00
# End of line reached before a HTTP GET or HEAD request found, mark Gopher as protocol
2016-08-02 00:27:38 +00:00
if protocol is None and len ( request ) > = 1 and request [ - 1 : ] == bytearray ( b ' \n ' ) :
protocol = Protocol . gopher
# Twice CR+LF, end of HTTP request
if protocol == Protocol . http and len ( request ) > = 4 and request [ - 4 : ] == bytearray ( b ' \r \n \r \n ' ) :
break
# Twice LF, malcompliant but support anyways
if protocol == Protocol . http and len ( request ) > = 2 and request [ - 2 : ] == bytearray ( b ' \n \n ' ) :
break
# CR+LF, end of Gopher request
if protocol == Protocol . gopher and len ( request ) > = 2 and request [ - 2 : ] == bytearray ( b ' \r \n ' ) :
break
# LF, malcompliant but support anyways
if protocol == Protocol . gopher and len ( request ) > = 1 and request [ - 1 : ] == bytearray ( b ' \n ' ) :
2016-07-12 14:11:58 +00:00
break
2016-08-02 00:27:38 +00:00
if protocol == Protocol . http :
length = len ( request )
2018-06-16 18:48:35 +00:00
# Start after GET/HEAD
index = 4 if just_headers else 3
2016-08-02 00:27:38 +00:00
# Skip witespace
while index < length and chr ( request [ index ] ) in [ ' ' , ' \r ' , ' \n ' , ' \t ' ] : index + = 1
# Found the start of the requested path
path_start = index
# Skip until next whitespace (end of requested path)
while index < length and chr ( request [ index ] ) not in [ ' ' , ' \r ' , ' \n ' , ' \t ' ] : index + = 1
# Found the end of the requested path
path_end = index
2018-06-19 15:13:01 +00:00
itemtype_path = urllib . parse . unquote ( request [ path_start : path_end ] . decode ( ' utf-8 ' ) )
itemtype , path = extract_itemtype_path ( itemtype_path , config = config )
2016-08-02 00:27:38 +00:00
2016-11-07 15:21:20 +00:00
# Try to extract user agent
useragent = None
for line in request . split ( b ' \n ' ) :
ua_string = b ' user-agent: '
if len ( line ) > = len ( ua_string ) and line . lower ( ) [ : len ( ua_string ) ] == ua_string :
try :
useragent = line [ len ( ua_string ) : ] . decode ( ' utf-8 ' )
except UnicodeDecodeError :
useragent = line [ len ( ua_string ) : ] . decode ( ' latin-1 ' )
useragent = useragent . strip ( )
2018-06-19 15:13:01 +00:00
rest = ( itemtype , just_headers , useragent )
2018-06-16 18:48:35 +00:00
2016-08-02 00:27:38 +00:00
elif protocol == Protocol . gopher :
2018-06-16 18:48:35 +00:00
rest = ( )
2016-08-02 00:27:38 +00:00
length = len ( request )
index = 0
# Seek until either end of line or a tab (field separator)
while index < length and chr ( request [ index ] ) not in [ ' \t ' , ' \r ' , ' \n ' ] : index + = 1
# Found the end of the path
path_end = index
path = request [ : path_end ] . decode ( ' utf-8 ' )
# If another field was present, check to see if it marks a Gopher+ request
if chr ( request [ index ] ) == ' \t ' :
index + = 1
field_start = index
# Look until end of line
while index < length and chr ( request [ index ] ) not in [ ' \r ' , ' \n ' ] : index + = 1
field_end = index
field = request [ field_start : field_end ] . decode ( ' utf-8 ' )
# We recognise these as signalling a Gopher+ request
if len ( field ) > = 1 and field [ 0 ] in [ ' + ' , ' ! ' , ' $ ' ] :
# It was Gopher+, let's update protocol value and stash the field into rest
protocol = Protocol . gopherplus
2018-06-16 18:48:35 +00:00
rest = ( field , )
2016-11-07 15:21:20 +00:00
2016-08-02 00:27:38 +00:00
else :
unreachable ( )
2018-06-16 18:48:35 +00:00
return ( path , protocol ) + rest
2016-07-12 14:11:58 +00:00
2016-08-02 02:38:48 +00:00
infofiles_cached = set ( )
infofiles_cached_lock = threading . Lock ( )
# read_infofile(file_path)
# Reads into caches the contents of .filesinfo file at same directory as file_path
def read_infofile ( file_path ) :
with infofiles_cached_lock :
if file_path in infofiles_cached :
return
infofile = configparser . ConfigParser ( )
infofile_path = file_path . parent / ' .filesinfo '
infofile . read ( str ( infofile_path ) )
for file in infofile . sections ( ) :
if ' mimetype ' in infofile [ file ] :
with mimetype_cache_lock :
mimetype_cache [ file_path . parent / file ] = infofile [ file ] [ ' mimetype ' ]
with infofiles_cached_lock :
infofiles_cached . add ( file_path )
# TODO: Read from file
2016-08-18 13:58:19 +00:00
extension_mimetypes = { ' .txt ' : ' text/plain ' , ' .text ' : ' text/plain ' , ' .log ' : ' text/plain ' , ' .html ' : ' text/html ' }
2016-08-02 02:38:48 +00:00
mimetype_cache = { }
mimetype_cache_lock = threading . Lock ( )
# get_mimetype(full_path, *, config) → mimetype
# Return the mime type of given file
def get_mimetype ( full_path , * , config ) :
mimetype = None
cached = False
# Look at the information file in the same directory
read_infofile ( full_path )
# Try looking up from cache
with mimetype_cache_lock :
if full_path in mimetype_cache :
mimetype = mimetype_cache [ full_path ]
cached = True
2018-01-27 17:34:11 +00:00
# See if it's a gophermap
if mimetype is None :
if full_path . name == ' gophermap ' :
mimetype = ' text/x-gophermap '
2016-08-02 02:38:48 +00:00
# Try extension
if mimetype is None :
extension = full_path . suffix
if extension in extension_mimetypes :
mimetype = extension_mimetypes [ extension ]
# Nothing worked, use fallback
if mimetype is None :
mimetype = config . fallback_mimetype
# Write into the cache
if not cached :
with mimetype_cache_lock :
mimetype_cache [ full_path ] = mimetype
return mimetype
# get_full_path(path, *, config) → full_path
# Figure out full path for the file
def get_full_path ( path , * , config ) :
full_path = config . gopher_root / path
# If it's a directory, use the gophermap file in said directory instead
st = os . stat ( str ( full_path ) )
if stat . S_ISDIR ( st . st_mode ) :
full_path = full_path / ' gophermap '
return full_path
2016-08-02 17:09:01 +00:00
class Status :
2018-06-19 15:27:51 +00:00
ok , notfound , error , badrequest = range ( 4 )
2016-08-02 17:09:01 +00:00
# is_text_from_mimetype(mimetype) → is_text
# A simple "is this data text" heuristic
def is_text_from_mimetype ( mimetype ) :
return mimetype . split ( ' / ' ) [ 0 ] == ' text '
2016-08-02 17:47:51 +00:00
# send_header(sock, protocol, status, mimetype, *, config)
# Send a header that matches the provided information
def send_header ( sock , protocol , status , mimetype , * , config ) :
2016-08-02 17:09:01 +00:00
is_text = is_text_from_mimetype ( mimetype )
if protocol == Protocol . http :
2018-01-27 17:34:11 +00:00
# We translate gophermaps into HTML, so send HTML mimetype
if mimetype == ' text/x-gophermap ' :
content_type = b ' Content-type: text/html '
else :
content_type = b ' Content-type: ' + mimetype . encode ( ' utf-8 ' )
2016-08-02 17:09:01 +00:00
# Add character set encoding information if we are transmitting text
if is_text :
content_type + = ( ' ; charset= %s ' % config . charset ) . encode ( ' utf-8 ' )
if status == Status . ok :
statusline = b ' HTTP/1.1 200 OK '
elif status == Status . notfound :
statusline = b ' HTTP/1.1 404 Not Found '
elif status == Status . error :
statusline = b ' HTTP/1.1 500 Internal Server Error '
2018-06-19 15:27:51 +00:00
elif status == Status . badrequest :
statusline = b ' HTTP/1.1 400 Bad Request '
2016-08-02 17:09:01 +00:00
header = statusline + b ' \r \n ' + content_type + b ' \r \n \r \n '
elif protocol == Protocol . gopherplus :
if status == Status . ok :
# Gopher has two ways to transmit data of unknown size, text (+-1) and binary (+-2)
if is_text :
header = b ' +-1 \r \n '
else :
header = b ' +-2 \r \n '
elif status == Status . notfound :
header = b ' --1 \r \n '
elif status == Status . error :
# Technically -2 means "Try again later", but there is no code for "server blew up"
header = b ' --2 \r \n '
2018-06-19 15:27:51 +00:00
elif status == Status . badrequest :
# Technically -1 means "File not found", but there is no code for "bad request"
header = b ' --1 \r \n '
2016-08-05 13:18:33 +00:00
2016-08-02 17:09:01 +00:00
elif protocol == Protocol . gopher :
# Gopher has no header
header = b ' '
2016-08-05 13:18:33 +00:00
2016-08-02 17:47:51 +00:00
else :
unreachable ( )
sock . sendall ( header )
2016-08-02 17:09:01 +00:00
2016-08-02 17:47:51 +00:00
# send_binaryfile(sock, reader, protocol, *, config)
# Send the data in the given reader as binary
def send_binaryfile ( sock , reader , protocol , * , config ) :
buffer_max = 1024
buffer = bytearray ( )
left = buffer_max
for byte in reader :
if left == 0 :
# Flush buffer
sock . sendall ( buffer )
left = buffer_max
buffer . append ( byte )
2016-08-05 13:18:33 +00:00
2016-08-02 17:47:51 +00:00
# If there was something left in the buffer, flush it
if len ( buffer ) != 0 :
sock . sendall ( buffer )
# send_textfile(sock, reader, protocol, *, config)
# Send the data in the given reader, encoded correctly as text file
def send_textfile ( sock , reader , protocol , * , config ) :
if protocol == Protocol . http :
# HTTP needs no additional encoding, send as binary
send_binaryfile ( sock , reader , protocol , config = config )
elif protocol == Protocol . gopher or protocol == Protocol . gopherplus :
line = bytearray ( )
for byte in reader :
if chr ( byte ) == ' \n ' :
# Append \r\n to end of line, send it, and clear
line . extend ( b ' \r \n ' )
sock . sendall ( line )
line = bytearray ( )
elif chr ( byte ) == ' . ' and len ( line ) == 0 :
# . in the beginning of line, needs to be quoted
line . extend ( b ' .. ' )
else :
# Add to the line
line . append ( byte )
# If there was no terminating \n, flush the line buffer
if len ( line ) != 0 :
line . extend ( b ' \r \n ' )
sock . sendall ( line )
# Signal end of text
sock . sendall ( b ' . \r \n ' )
2016-08-05 13:18:33 +00:00
2016-08-02 17:47:51 +00:00
else :
unreachable ( )
2018-01-27 17:34:11 +00:00
# html_encode(bytestring) -> encoded_bytestring
# Makes bytestring usable as HTML text
def html_encode ( bytestring ) :
return bytestring . replace ( b ' & ' , b ' & ' ) . replace ( b ' < ' , b ' < ' ) . replace ( b ' > ' , b ' > ' )
# send_gophermap(sock, reader, protocol, *, config)
# Send the gophermap in the given reader either as gophermap or HTML
def send_gophermap ( sock , reader , protocol , * , config ) :
if protocol == Protocol . gopher or protocol == Protocol . gopherplus :
# Gopher(+) needs no additional translation, send as text
send_textfile ( sock , reader , protocol , config = config )
elif protocol == Protocol . http :
# Send header of the HTML file
2018-08-14 09:31:01 +00:00
sock . sendall ( b ' <!DOCTYPE html> \n <head><title>Gophermap</title><style>body { max-width: 70ch; margin: auto; }</style></head><body><p> \n ' )
2018-01-27 17:34:11 +00:00
lines = [ ]
line = bytearray ( )
for byte in reader :
if chr ( byte ) == ' \n ' :
# Add to lines and clear
lines . append ( line )
line = bytearray ( )
else :
# Add to the line
line . append ( byte )
# If there was no terminating \n, add the line to lines
if len ( line ) != 0 :
lines . append ( line )
for line in lines :
# Translate to html and send it
# Split into components
itemtype_name , path , server , port , * _ = line . split ( b ' \t ' )
itemtype = itemtype_name [ 0 : 1 ]
name = itemtype_name [ 1 : ]
if itemtype == b ' i ' :
# Text
sock . sendall ( html_encode ( name ) + b ' <br/> \n ' )
else :
# Link
# TODO: Figure out a heuristic when to pick http:// and when to pick gopher://
if port == b ' 70 ' :
# If port is 70, don't include the port part. This allows interoperability with Idigna
url = b ' http:// ' + server + b ' / ' + itemtype + urllib . parse . quote_from_bytes ( path ) . encode ( ' utf-8 ' )
else :
url = b ' http:// ' + server + b ' : ' + port + b ' / ' + itemtype + urllib . parse . quote_from_bytes ( path ) . encode ( ' utf-8 ' )
sock . sendall ( b ' <a href= " ' + url + b ' " > ' + html_encode ( name ) + b ' </a><br/> \n ' )
# Send footer of the HTML file
sock . sendall ( b ' </p></body></html> ' )
else :
unreachable ( )
2016-08-02 17:47:51 +00:00
# send_file(sock, reader, protocol, mimetype, *, config)
# Send data from reader over the socket with right encoding for the mimetype
def send_file ( sock , reader , protocol , mimetype , * , config ) :
2018-01-27 17:34:11 +00:00
if mimetype == ' text/x-gophermap ' :
# Send as gophermap (possibly translated into HTML)
send_gophermap ( sock , reader , protocol , config = config )
elif is_text_from_mimetype ( mimetype ) :
2016-08-02 17:47:51 +00:00
# Send as text
send_textfile ( sock , reader , protocol , config = config )
2018-01-27 17:34:11 +00:00
2016-08-02 17:47:51 +00:00
else :
# Send as binary file
send_binaryfile ( sock , reader , protocol , config = config )
2016-08-02 17:09:01 +00:00
2016-08-02 22:13:45 +00:00
# test_is_cgi(full_path, *, config) → is_cgi
# Tests whether file associated with full_path is CGI
def test_is_cgi ( full_path , * , config ) :
# Assume anything runnable is CGI
return os . access ( str ( full_path ) , os . X_OK )
# get_file(full_path, *, config)
# Get a file object that can be passed to FileReader, either of file's contents of CGI's output
def get_file ( full_path , * , config ) :
if test_is_cgi ( full_path , config = config ) :
# Run CGI and use its output
proc = subprocess . Popen ( [ str ( full_path ) ] , stdout = subprocess . PIPE )
return proc . stdout
else :
# Open file in binary mode
file = open ( str ( full_path ) , ' rb ' )
return file
2016-11-11 18:44:29 +00:00
# is_hurl_path(path_raw) → is_hurl
# Returns whether the path is a hURL redirect
def is_hurl_path ( path_raw ) :
return len ( path_raw ) > = 4 and path_raw [ : 4 ] == ' URL: '
2016-11-11 18:50:44 +00:00
# hurl_redirect(url_raw, *, config) → redirect_page
2016-11-11 18:44:29 +00:00
# Return a HTML page for hURL redirect
2016-11-11 18:50:44 +00:00
def hurl_redirect ( url_raw , * , config ) :
2016-11-11 18:44:29 +00:00
url_escaped = cgi . escape ( url_raw )
return config . hurl_redirect_page . replace ( ' __raw_url__ ' , url_raw ) . replace ( ' __escaped_url__ ' , url_escaped )
2016-07-12 14:11:58 +00:00
# Worker thread implementation
class Serve ( threading . Thread ) :
2016-08-01 21:30:46 +00:00
def __init__ ( self , controller , sock , address , config ) :
self . controller = controller
2016-07-12 14:11:58 +00:00
self . sock = sock
self . address = address
2016-08-01 21:01:07 +00:00
self . config = config
2016-07-12 14:11:58 +00:00
threading . Thread . __init__ ( self )
2016-08-02 00:27:38 +00:00
2016-07-12 14:11:58 +00:00
def handle_request ( self ) :
2016-08-02 00:27:38 +00:00
sockreader = SocketReader ( self . sock )
2016-08-02 02:38:48 +00:00
2018-06-16 18:48:35 +00:00
path_raw , protocol , * rest = get_request ( sockreader , config = self . config )
just_headers = False
if protocol == Protocol . http :
2018-06-19 15:13:01 +00:00
itemtype , just_headers , useragent = rest
2016-11-11 18:44:29 +00:00
2016-08-02 02:38:48 +00:00
try :
2018-06-19 15:27:51 +00:00
if protocol == Protocol . http and itemtype is not None and itemtype not in self . config . recognised_itemtypes :
# If we don't recognize the requested itemtype, signal that it was a bad request
log ( ' %s [ %s ] requested path %s with bad itemtype %s ' % ( self . address , protocol . name , path_raw , itemtype ) )
2018-07-02 08:31:36 +00:00
reader = StringReader ( ' %s not recognized as an item type \n \n Recognized ones are %s \n \n The correct URL syntax is http://server:port/0/textfile \n ' % ( itemtype , ' , ' . join ( self . config . recognised_itemtypes ) ) )
2018-06-19 15:27:51 +00:00
send_header ( self . sock , protocol , Status . badrequest , ' text/plain ' , config = self . config )
send_file ( self . sock , reader , protocol , ' text/plain ' , config = self . config )
elif is_hurl_path ( path_raw ) :
2016-11-11 18:50:44 +00:00
url_raw = path_raw [ 4 : ]
log ( ' %s [ %s ] hURL %s ' % ( self . address , protocol . name , url_raw ) )
reader = StringReader ( hurl_redirect ( url_raw , config = self . config ) )
2016-08-02 18:30:34 +00:00
2016-11-11 18:44:29 +00:00
send_header ( self . sock , protocol , Status . ok , ' text/html ' , config = self . config )
send_file ( self . sock , reader , protocol , ' text/html ' , config = self . config )
2016-08-02 18:30:34 +00:00
2016-08-02 02:38:48 +00:00
else :
2016-11-11 18:44:29 +00:00
path = normalize_path ( path_raw , config = self . config )
try :
full_path = get_full_path ( path , config = self . config )
mimetype = get_mimetype ( full_path , config = self . config )
file = get_file ( full_path , config = self . config )
except FileNotFoundError :
2018-07-10 20:15:41 +00:00
log ( ' %s [ %s ] requested path not found %s ' % ( self . address , protocol . name , path_raw ) )
2018-06-17 15:36:05 +00:00
reader = StringReader ( ' %s not found \n \n Make sure you included the item type in the URL \n ' % path_raw )
2016-11-11 18:44:29 +00:00
send_header ( self . sock , protocol , Status . notfound , ' text/plain ' , config = self . config )
2018-06-16 18:48:35 +00:00
if not just_headers :
send_file ( self . sock , reader , protocol , ' text/plain ' , config = self . config )
2016-11-11 18:44:29 +00:00
else :
log ( ' %s [ %s ] requested path %s ' % ( self . address , protocol . name , path_raw ) )
reader = FileReader ( file )
2016-08-02 18:30:34 +00:00
2016-11-11 18:44:29 +00:00
send_header ( self . sock , protocol , Status . ok , mimetype , config = self . config )
2018-06-16 18:48:35 +00:00
if not just_headers :
send_file ( self . sock , reader , protocol , mimetype , config = self . config )
2016-08-02 18:30:34 +00:00
2016-11-11 18:44:29 +00:00
file . close ( )
2016-08-02 18:30:34 +00:00
2016-08-02 02:38:48 +00:00
except BaseException as err :
2016-08-02 17:47:51 +00:00
reader = StringReader ( ' Internal server error \n ' )
send_header ( self . sock , protocol , Status . error , ' text/plain ' , config = self . config )
send_file ( self . sock , reader , protocol , ' text/plain ' , config = self . config )
2016-08-02 02:38:48 +00:00
raise err
2016-08-02 00:27:38 +00:00
2018-06-16 18:48:35 +00:00
if protocol == Protocol . http :
2016-11-07 15:21:20 +00:00
log ( ' User agent: %s ' % useragent )
2016-07-12 14:11:58 +00:00
def run ( self ) :
global threads_amount , threads_lock
try :
self . handle_request ( )
2016-07-12 14:23:58 +00:00
except BaseException as err : # Catch and log exceptions instead of letting to crash, as we need to update the worker thread count on abnormal exit as well
2020-06-02 00:43:13 +00:00
if isinstance ( err , EmptyRequestError ) :
# No real reason to log these, would get kinda spammy if we did due to mobile Chrome liking to cause these
pass
else :
error ( ' Worker thread ( %s ) died with: %s ' % ( self . address , err ) )
2016-07-12 14:11:58 +00:00
finally :
self . sock . close ( )
2016-08-01 21:30:46 +00:00
self . controller . thread_end ( )
2016-08-02 00:27:38 +00:00
2016-08-01 21:30:46 +00:00
class Threads_controller :
def __init__ ( self ) :
self . threads_amount = 0
self . threads_lock = threading . Lock ( )
2016-08-02 00:27:38 +00:00
2016-08-01 21:30:46 +00:00
# .spawn_thread(sock, address, config)
# Spawn a new thread to serve a connection if possible, do nothing if not
def spawn_thread ( self , sock , address , config ) :
# See if we can spawn a new thread. If not, log an error, close the socket and return. If yes, increment the amount of threads running
with self . threads_lock :
if self . threads_amount > = config . max_threads :
error ( ' Could not serve a request from %s , worker thread limit exhausted ' % address )
sock . close ( )
return
else :
self . threads_amount + = 1
2016-08-02 00:27:38 +00:00
2016-08-01 21:30:46 +00:00
# Spawn a new worker thread
Serve ( self , sock , address , config ) . start ( )
2016-08-02 00:27:38 +00:00
2016-08-01 21:30:46 +00:00
# .thread_end()
# Called from worker thread to signal it's exiting
def thread_end ( self ) :
with self . threads_lock :
self . threads_amount - = 1
2016-07-12 14:11:58 +00:00
2016-08-05 13:18:33 +00:00
class IPParseError ( OneArgumentException ) :
text = ' Error parsing IP: %s '
# read_blacklist(blacklist_file) → blacklist
# Reads the contents of the blacklist file into a form usable by ip_in_ranges()
def read_blacklist ( blacklist_file ) :
try :
file = open ( str ( blacklist_file ) , ' r ' )
except FileNotFoundError :
return [ ]
lines = file . read ( ) . split ( ' \n ' )
file . close ( )
blacklist = [ ]
for line in lines :
# Comment handling
if ' # ' in line :
line = line [ : line . index ( ' # ' ) ]
# Remove surrounding whitespace
line = line . strip ( )
# If an empty line, skip
if line == ' ' :
continue
try :
ip_range = ipaddress . ip_network ( line )
2020-05-28 17:43:36 +00:00
except ValueError as err :
raise IPParseError ( ' Invalid format: ' + line ) from err
2016-08-05 13:18:33 +00:00
blacklist . append ( ip_range )
return blacklist
# ip_in_ranges(ip, ip_ranges) → in_rages
# Checks whether an ip address is in given ranges
def ip_in_ranges ( ip , ip_ranges ) :
try :
ip = ipaddress . ip_address ( ip )
2020-05-28 17:43:36 +00:00
except ValueError as err :
raise IPParseError ( ' Invalid format: ' + line ) from err
2016-08-05 13:18:33 +00:00
for ip_range in ip_ranges :
if ip in ip_range :
return True
return False
2016-08-01 21:20:19 +00:00
# listen(config) → (Never returns)
2016-07-12 14:11:58 +00:00
# Binds itself to all interfaces on designated port and listens on incoming connections
# Spawns worker threads to handle the connections
2016-08-01 21:01:07 +00:00
def listen ( config ) :
2016-07-12 14:11:58 +00:00
# Get sockets that we listen to
2016-08-01 21:04:50 +00:00
listening_sockets = bind ( config . port )
2016-07-12 14:11:58 +00:00
# Drop privileges, we don't need them after this
drop_privileges ( )
# If we got no sockets to listen to, die
if listening_sockets == [ ] :
2016-08-01 21:04:50 +00:00
die ( ' Could not bind to port %i ' % config . port )
2016-07-12 14:11:58 +00:00
2016-08-01 21:20:19 +00:00
# Create a poll object for the listening sockets and a fd→socket map
2016-07-12 14:11:58 +00:00
listening = select . poll ( )
2016-07-12 14:18:00 +00:00
sock_by_fd = { }
2016-07-12 14:11:58 +00:00
for s in listening_sockets :
listening . register ( s , select . POLLIN )
sock_by_fd [ s . fileno ( ) ] = s
del listening_sockets
2016-08-01 21:30:46 +00:00
# Create a controller object for the worker threads
threads_controller = Threads_controller ( )
2016-08-02 00:27:38 +00:00
2016-08-05 13:18:33 +00:00
# Read blacklist of addresses
blacklist = read_blacklist ( config . blacklist_file )
2016-07-12 14:11:58 +00:00
while True :
# Wait for listening sockets to get activity
events = listening . poll ( )
for fd , event in events :
assert ( event == select . POLLIN )
2016-07-12 14:18:00 +00:00
# Get socket from table established previously
2016-07-12 14:11:58 +00:00
s = sock_by_fd [ fd ]
# Accept and handle the connection
2016-07-12 14:18:00 +00:00
conn , addr = s . accept ( )
2016-07-12 14:11:58 +00:00
2016-08-05 13:18:33 +00:00
# Check if connection is from a blacklisted IP address
if ip_in_ranges ( addr [ 0 ] , blacklist ) :
# It was, skip event
conn . close ( )
log ( ' Connection from blacklisted address %s ' % addr [ 0 ] )
continue
2016-07-12 14:33:59 +00:00
# Set timeout for socket
2016-07-12 14:42:36 +00:00
conn . settimeout ( config . socket_timeout )
2016-07-12 14:33:59 +00:00
2016-08-01 21:30:46 +00:00
threads_controller . spawn_thread ( conn , addr [ 0 ] , config )
2016-07-12 14:11:58 +00:00
2016-08-01 21:04:50 +00:00
if __name__ == ' __main__ ' :
listen ( default_config )