lostfilm.py

#VERSION: 0.22
#AUTHORS: Bugsbringer (dastins193@gmail.com)


EMAIL = "YOUR_EMAIL"
PASSWORD = "YOUR_PASSWORD"
ENABLE_PEERS_INFO = True
SITE_URL = "https://www.lostfilm.tv"

proxy = {
    'enable': False,

    'proxy_urls': {
        'http': 'ip:port',
        'https': 'ip:port'
    },

    'auth': False,
    'username': '',
    'password': ''
}

import concurrent.futures
import hashlib
import json
import logging
import os
import re
from collections import OrderedDict
from datetime import datetime
from html.parser import HTMLParser
from http.cookiejar import CookieJar
from io import BytesIO
from time import time
from urllib import parse, request

from novaprinter import prettyPrinter

STORAGE = os.path.abspath(os.path.dirname(__file__))
is_main = __name__ == '__main__'

# logging
log_config = {
    'level': 'DEBUG' if is_main else 'ERROR',
    'format': '[%(asctime)s] %(levelname)s:%(name)s:%(funcName)s - %(message)s',
    'datefmt': '%d-%b-%y %H:%M:%S'
}

if not is_main:
    log_config.update({'filename': os.path.join(STORAGE, 'lostfilm.log')})

logging.basicConfig(**log_config)
logger = logging.getLogger('lostfilm')
logger.setLevel(logging.WARNING)


class lostfilm:
    url = SITE_URL
    name = 'LostFilm'
    supported_categories = {'all': '0'}

    search_url_pattern = SITE_URL + '/search/?q={what}'
    serial_url_pattern = SITE_URL + '{href}/seasons'
    download_url_pattern = SITE_URL + '/v_search.php?a={code}'
    season_url_pattern = SITE_URL + '{href}/season_{season}'
    episode_url_pattern = SITE_URL + '{href}/season_{season}/episode_{episode}/'
    additional_url_pattern = SITE_URL + '{href}/additional/episode_{episode}/'
    new_url_pattern = SITE_URL + '/new/page_{page}/type_{type}'

    additional_season = 999
    all_episodes = 999
    peer_id = '-PC0001-' + str(time()).replace('.', '')[-12:]

    datetime_format = '%d.%m.%Y'
    units_dict = {"ТБ": "TB", "ГБ": "GB", "МБ": "MB", "КБ": "KB", "Б": "B"}

    def __init__(self, output=True):
        self.output = output
        self.session = Session()
        
    def search(self, what, cat='all'):
        logger.info(what)

        self.torrents_count = 0
        self.prevs = set()
        self.old_seasons = dict()

        if not self.session.is_actual: 
            self.pretty_printer({
                'link': 'Error',
                'name': self.session.error,
                'size': "0",
                'seeds': -1,
                'leech': -1,
                'engine_url': self.url,
                'desc_link': self.url
            })

            return False

        if parse.unquote(what).startswith('@'): 
            params = parse.unquote(what)[1:].split(':')
            
            if params:
                if params[0] == 'fav':
                    self.get_fav()

                elif params[0] == 'new':
                    if len(params) == 1:
                        self.get_new()

                    elif len(params) == 2 and params[1] == 'fav':
                        self.get_new(fav=True)
        else:
            try:
                url = self.search_url_pattern.format(what=request.quote(what))
                search_result = self.session.request(url)
            except Exception as exp:
                logger.error(exp)

            else:
                serials_tags = Parser(search_result).find_all('div', {'class': 'row-search'})
                if serials_tags:
                    with concurrent.futures.ThreadPoolExecutor() as executor:
                        for serial_href in (serial.a['href'] for serial in serials_tags):
                            logger.debug(serial_href)

                            executor.submit(self.get_episodes, serial_href)
        
        logger.info('%s torrents', self.torrents_count)

    def get_new(self, fav=False, days=7):
        type = 99 if fav else 0
        today = datetime.now().date()
        self.dates = {}

        with concurrent.futures.ThreadPoolExecutor() as executor:
            page_number = 1
            while True:
                url = self.new_url_pattern.format(page=page_number, type=type)
                page = self.session.request(url)

                rows = Parser(page).find_all('div', {'class': 'row'})

                if not rows:
                    break

                for row in rows:
                    
                    release_date_str = row.find_all('div', {'class': 'alpha'})[1].text
                    release_date_str = re.search(r'\d{2}.\d{2}.\d{4}', release_date_str)[0]
                    release_date = datetime.strptime(release_date_str, self.datetime_format).date()

                    date_delta = today - release_date

                    if date_delta.days > days:
                        return

                    href = '/'.join(row.a['href'].split('/')[:3])

                    haveseen_btn = row.find('div', {'onclick': 'markEpisodeAsWatched(this);'})
                    episode_code = haveseen_btn['data-episode'].rjust(9, '0')

                    self.dates[episode_code] = release_date_str
                    
                    executor.submit(self.get_torrents, href, episode_code, True)
                
                page_number += 1

    def get_fav(self):
        page = self.session.request(SITE_URL + '/my/type_1')

        with concurrent.futures.ThreadPoolExecutor() as executor:
            for serial in Parser(page).find_all('div', {'class': 'serial-box'}):
                href = serial.find('a', {'class': 'body'})['href']
                executor.submit(self.get_episodes, href)

    def get_episodes(self, serial_href):
        self.old_seasons.setdefault(serial_href, 0)

        serial_page = self.session.request(self.serial_url_pattern.format(href=serial_href))
        with concurrent.futures.ThreadPoolExecutor() as executor:
            for button in Parser(serial_page).find_all('div', {'class': 'external-btn'}):
                item_button = button.attrs.get('onclick')

                if item_button:
                    episode_code = re.search(r'\d{7,9}', item_button)[0].rjust(9, '0')
                    logger.debug('episode_code = %s', episode_code)
                    executor.submit(self.get_torrents, serial_href, episode_code)

    def get_torrents(self, href, code, new_episodes=False):
        season, episode = int(code[3:6]), int(code[6:])
        
        if not any((
			season > self.old_seasons.get(href, -1),
			episode == self.all_episodes,
			season == self.additional_season,
			new_episodes
        )):
            return

        redir_page = self.session.request(self.download_url_pattern.format(code=code))
        torrent_page_url = re.search(r'(?<=location.replace\(").+(?="\);)', redir_page)

        if not torrent_page_url:
            return
        
        torrent_page = self.session.request(torrent_page_url[0])
        date = '' if not new_episodes else '[' + self.dates.pop(code, '') + ']'
        desc_link = self.get_description_url(href, code)

        logger.debug('desc_link = %s', desc_link)

        with concurrent.futures.ThreadPoolExecutor() as executor:
            for torrent_tag in Parser(torrent_page).find_all('div', {'class': 'inner-box--item'}):
                main = torrent_tag.find('div', {'class': 'inner-box--link main'}).a
                link, name = main['href'], ' '.join((main.text.replace('\n', ' '), date))

                if not new_episodes:
                    if link in self.prevs:
                        self.old_seasons[href] = max(self.old_seasons.get(href, 0), season)
                        break
                    
                    self.prevs.add(link)

                size, unit = re.search(
                    r'\d+.\d+ \w\w(?=\.)', 
                    torrent_tag.find('div', {'class': 'inner-box--desc'}).text
                )[0].split()

                torrent_dict = {
                    'link': link,
                    'name': name,
                    'size': ' '.join((size, self.units_dict.get(unit, ''))),
                    'seeds': -1,
                    'leech': -1,
                    'engine_url': self.url,
                    'desc_link': desc_link
                }

                if ENABLE_PEERS_INFO:
                    future = executor.submit(self.get_torrent_info, torrent_dict)
                    future.add_done_callback(lambda f: self.pretty_printer(f.result()))
                else:
                    self.pretty_printer(torrent_dict)

    def get_description_url(self, href, code):
        season, episode = int(code[3:6]), int(code[6:])

        if season == self.additional_season:
            return self.additional_url_pattern.format(href=href, episode=episode)

        elif episode == self.all_episodes:
            return self.season_url_pattern.format(href=href, season=season)

        else:
            return self.episode_url_pattern.format(href=href, season=season, episode=episode)

    def get_torrent_info(self, tdict):
        response = self.session.request(tdict['link'], decode=False)
        if not response:
            return tdict

        torrent = self.decode_data(response)
        torrent_info = self.encode_obj(torrent.get(b'info'))
        if not torrent_info:
            return tdict

        info_hash = hashlib.sha1(torrent_info).digest()

        params = {
            'peer_id': self.peer_id,
            'info_hash': info_hash,
            'port': 6881,
            'left': 0,
            'downloaded': 0,
            'uploaded': 0,
            'compact': 1
        }
        url = torrent[b'announce'].decode('utf-8') + '?' + parse.urlencode(params)
        response = self.session.request(url, decode=False)
        if response:
            data = self.decode_data(response)
            tdict['seeds'] = data.get(b'complete', 0) - 1
            tdict['leech'] = data.get(b'incomplete', -1)
        
        return tdict

    def decode_data(self, data):
        try:
            return bdecode(data)
        except Exception as e:
            logger.error(e)
            return dict()

    def encode_obj(self, obj):
        try:
            return bencode(obj)
        except Exception as e:
            logger.error(e)
            return b''
    
    def pretty_printer(self, dictionary):
        if dictionary['link'] == 'Error':
            logger.error(dictionary)
        else:
            logger.debug(dictionary)
            self.torrents_count += 1

        if self.output:
            try:
                prettyPrinter(dictionary.copy())
            except OSError as e:
                logger.error('error %s on printing %s', e, dictionary)


class Session:
    site_name = 'lostfilm'
    file_name = 'lostfilm.json'
    datetime_format = '%m-%d-%y %H:%M:%S'

    token = None
    time = None
    _error = None

    @property
    def error(self):
        return 'Error: {info}.'.format(info=self._error)

    @property
    def file_path(self):
        """path to file with session data"""
        return os.path.join(STORAGE, self.file_name)

    @property
    def is_actual(self):
        """Checks the relevance of the token"""

        if self.token and self.time and not self._error:
            delta = datetime.now() - self.time
            return delta.days < 1

        return False

    @property
    def cookies(self):
        if not self.is_actual:
            self.create_new()

        return {'lf_session': self.token}

    def __init__(self):
        self.load_data()
            
        if not self.is_actual:
            if self.create_new():
                self.save_data()

    def request(self, url, params=None, decode=True):
        args = [url]

        try:
            if proxy['enable'] and self.site_name in url:
                opener = request.build_opener(
                    request.ProxyBasicAuthHandler(),
                    request.ProxyHandler(proxy['proxy_urls'])
                )

                logger.info('proxy used for "%s"', url)
            else:
                opener = request.build_opener()

            # use cookies only for lostfilm site urls
            if self.site_name in url:
                if not params:
                    params = self.cookies
                else:
                    params.update(self.cookies)

            if params:
                args.append(parse.urlencode(params).encode('utf-8'))

            result = opener.open(*args).read()

            return result if not decode else result.decode('utf-8')

        except Exception as e:
            logger.error('%s url="%s" params="%s"' % (e, url, params))

    def load_data(self):
        if not os.path.exists(self.file_path):
            return

        with open(self.file_path, 'r') as file:
            result = json.load(file)

        if result.get('token') and result.get('time'):
            self.token = result['token']
            self.time = self.datetime_from_string(result['time'])

            logger.info('%s %s', self.token, self.time)

    def create_new(self):
        self._error = None

        if not (EMAIL and PASSWORD):
            self._error = 'Incorrect login data'
            logger.error(self._error)
            return False

        login_data = {
            "act": "users",
            "type": "login",
            "mail": EMAIL,
            "pass": PASSWORD,
            "need_captcha": "",
            "captcha": "",
            "rem": 1
        }
        
        url = SITE_URL + '/ajaxik.php?'
        params = parse.urlencode(login_data).encode('utf-8')
        
        cjar = CookieJar()
        if proxy['enable']:
            opener = request.build_opener(
                request.ProxyHandler(proxy['proxy_urls']),
                request.HTTPCookieProcessor(cjar)
            )
            logger.debug('proxy used')
        else:
            opener = request.build_opener(request.HTTPCookieProcessor(cjar))

        try:
            response = opener.open(url, params).read().decode('utf-8')
        except Exception as e:
            self._error = 'Connection failed'
            logger.error('%s %s', self._error, e)
            return False

        result = json.loads(response)
        
        if 'error' in result:
            self._error = 'Incorrect login data'

        elif 'need_captcha' in result:
            self._error = 'Captcha requested'

        else:
            for cookie in cjar:
                if cookie.name == 'lf_session':
                    self.time = datetime.now()
                    self.token = cookie.value
                    
                    logger.info('%s %s', self.token, self.time)

                    return True

            else:
                self._error = 'Token problem'

        logger.error(self._error)

        return False

    def save_data(self):
        data = {
            "token": self.token,
            "time": None if not self.time else self.datetime_to_string(self.time)
        }

        logger.info(data)

        with open(self.file_path, 'w') as file:
            json.dump(data, file)

    def datetime_to_string(self, dt_obj):
        if isinstance(dt_obj, datetime):
            return dt_obj.strftime(self.datetime_format)

        else:
            raise TypeError('argument must be datetime, not %s' % (type(dt_obj)))

    def datetime_from_string(self, dt_string):
        if isinstance(dt_string, str):
            return datetime.strptime(dt_string, self.datetime_format)

        else:
            raise TypeError('argument must be str, not %s' % (type(dt_string)))


class Tag:
    def __init__(self, tag=None, attrs=(), is_self_closing=None):
        self.type = tag
        self.is_self_closing = is_self_closing
        self._attrs = tuple(attrs)
        self._content = tuple()
        
    @property
    def attrs(self):
        """returns dict of Tag's attrs"""
        return dict(self._attrs)

    @property
    def text(self):
        """returns str of all contained text"""
        return ''.join(c if isinstance(c, str) else c.text for c in self._content)

    def _add_content(self, obj):
        if isinstance(obj, (Tag, str)):
            self._content += (obj,)
        else:
            raise TypeError('Argument must be str or %s, not %s' % (self.__class__, obj.__class__))

    def find(self, tag=None, attrs=None):
        """returns Tag or None"""

        return next(self._find_all(tag, attrs), None)

    def find_all(self, tag=None, attrs=None):
        """returns list"""

        return list(self._find_all(tag, attrs))
        
    def _find_all(self, tag_type=None, attrs=None):
        """returns generator"""

        if not (isinstance(tag_type, (str, Tag)) or tag_type is None):
            raise TypeError('tag_type argument must be str or Tag, not %s' % (tag_type.__class__))

        if not (isinstance(attrs, dict) or attrs is None):
            raise TypeError('attrs argument must be dict, not %s' % (self.__class__))

        # get tags-descendants generator
        results = self.descendants

        # filter by Tag.type
        if tag_type:
            if isinstance(tag_type, Tag):
                tag_type, attrs = tag_type.type, (attrs if attrs else tag_type.attrs)

            results = filter(lambda t: t.type == tag_type, results)

        # filter by Tag.attrs
        if attrs:
            # remove Tags without attrs
            results = filter(lambda t: t._attrs, results)

            def filter_func(tag):
                for key in attrs.keys():
                    if attrs[key] not in tag.attrs.get(key, ()):
                        return False
                return True
            
            # filter by attrs
            results = filter(filter_func, results)
        
        yield from results

    @property
    def children(self):
        """returns generator of tags-children"""

        return (obj for obj in self._content if isinstance(obj, Tag))

    @property
    def descendants(self):
        """returns generator of tags-descendants"""

        for child_tag in self.children:
            yield child_tag
            yield from child_tag.descendants

    def __getitem__(self, key):
        return self.attrs[key]

    def __getattr__(self, attr):
        if not attr.startswith("__"):
            return self.find(tag=attr)

    def __repr__(self):
        attrs = ' '.join(str(k) if v is None else '{}="{}"'.format(k, v) for k, v in self._attrs)
        starttag = ' '.join((self.type, attrs)) if attrs else self.type

        if self.is_self_closing:
            return '<{}>\n'.format(starttag)
        else:
            nested = '\n' * bool(next(self.children, None)) + ''.join(map(str, self._content))
            return '<{}>{}</{}>\n'.format(starttag, nested, self.type)

            
class Parser(HTMLParser):
    def __init__(self, html_code, *args, **kwargs):
        super().__init__(*args, **kwargs)

        self._root = Tag('_root')
        self._path = [self._root]
        
        self.feed(''.join(map(str.strip, html_code.splitlines())))
        self.handle_endtag(self._root.type)
        self.close()

        self.find = self._root.find
        self.find_all = self._root.find_all

    @property
    def attrs(self):
        return self._root.attrs

    @property
    def text(self):
        return self._root.text

    def handle_starttag(self, tag, attrs):
        self._path.append(Tag(tag=tag, attrs=attrs))

    def handle_endtag(self, tag_type):
        for pos, tag in tuple(enumerate(self._path))[::-1]:
            if isinstance(tag, Tag) and tag.type == tag_type and tag.is_self_closing is None:
                tag.is_self_closing = False

                for obj in self._path[pos + 1:]:
                    if isinstance(obj, Tag) and obj.is_self_closing is None:
                        obj.is_self_closing = True

                    tag._add_content(obj)

                self._path = self._path[:pos + 1]

                break

    def handle_startendtag(self, tag, attrs):
        self._path.append(Tag(tag=tag, attrs=attrs, is_self_closing=True))

    def handle_decl(self, decl):
        self._path.append(Tag(tag='!'+decl, is_self_closing=True))

    def handle_data(self, text):
        self._path.append(text)

    def __getitem__(self, key):
        return self.attrs[key]

    def __getattr__(self, attr):
        if not attr.startswith("__"):
            return getattr(self._root, attr)

    def __repr__(self):
        return ''.join(str(c) for c in self._root._content)


def bencode(value):
    if isinstance(value, dict):
        return b'd%be' % b''.join([bencode(k) + bencode(v) for k, v in value.items()])
    if isinstance(value, list) or isinstance(value, tuple):
        return b'l%be' % b''.join([bencode(v) for v in value])
    if isinstance(value, int):
        return b'i%ie' % value
    if isinstance(value, bytes):
        return b'%i:%b' % (len(value), value)

    raise ValueError("Only int, bytes, list or dict can be encoded, got %s" % type(value).__name__)


def bdecode(data):
    class InvalidBencode(Exception):
        @classmethod
        def at_position(cls, error, position):
            logger.error("%s at position %i" % (error, position))
            return cls("%s at position %i" % (error, position))

        @classmethod
        def eof(cls):
            logger.error("EOF reached while parsing")
            return cls("EOF reached while parsing")
            
    def decode_from_io(f):
        char = f.read(1)
        if char == b'd':
            dict_ = OrderedDict()
            while True:
                position = f.tell()
                char = f.read(1)
                if char == b'e':
                    return dict_
                if char == b'':
                    raise InvalidBencode.eof()

                f.seek(position)
                key = decode_from_io(f)
                dict_[key] = decode_from_io(f)

        if char == b'l':
            list_ = []
            while True:
                position = f.tell()
                char = f.read(1)
                if char == b'e':
                    return list_
                if char == b'':
                    raise InvalidBencode.eof()
                f.seek(position)
                list_.append(decode_from_io(f))

        if char == b'i':
            digits = b''
            while True:
                char = f.read(1)
                if char == b'e':
                    break
                if char == b'':
                    raise InvalidBencode.eof()
                if not char.isdigit():
                    raise InvalidBencode.at_position('Expected int, got %s' % str(char), f.tell())
                digits += char
            return int(digits)

        if char.isdigit():
            digits = char
            while True:
                char = f.read(1)
                if char == b':':
                    break
                if char == b'':
                    raise InvalidBencode
                digits += char
            length = int(digits)
            string = f.read(length)
            return string

        raise InvalidBencode.at_position('Unknown type : %s' % char, f.tell())

    return decode_from_io(BytesIO(data))


if __name__ == '__main__':
    import sys
    
    if 1 < len(sys.argv) < 4:

        if len(sys.argv) == 3:
            if sys.argv[1] == '-d':
                logger.setLevel(logging.DEBUG)
            
            else:
                print('%s [-d] "search query"' % (__file__))
                exit()
        else:
            logger.setLevel(logging.INFO)
        
        lostfilm(True).search(sys.argv[-1])