Source code for domiporta.utils

#!/usr/bin/python
# -*- coding: utf-8 -*-

import logging

import requests
from scrapper_helpers.utils import caching, get_random_user_agent, key_md5, replace_all, finder

from . import BASE_URL

log = logging.getLogger(__file__)
POLISH_CHARACTERS_MAPPING = {"ą": "a", "ć": "c", "ę": "e", "ł": "l", "ń": "n", "ó": "o", "ś": "s", "ż": "z", "ź": "z"}


[docs]def encode_text_to_html(text): """ Change text to lower cases, gets rid of polish characters replacing them with simplified version, replaces spaces with dashes :param text: text to encode :type text: str :return: encoded text which can be used in url :rtype: str """ replace_dict = POLISH_CHARACTERS_MAPPING replace_dict.update({' ': '-'}) return replace_all(text.lower(), replace_dict)
@finder(many=False, class_='pagination__input') def get_max_number_page(item, *args, **kwargs): """ Parse number of pages for search result :param item: Tag html found by finder in html markup :return: number of pages with available offers in search result :rtype: int """ return int(item.attrs.get('max'))
[docs]def get_url(category='nieruchomosci', transaction_type='wszystkie', voivodeship=None, city=None, street=None, filters=None): """ Create url to Domiporta search web page with given parameters and filters :param category: Type of property of interest (Mieszkanie/Dom/Garaż/Działka) :param transaction_type: Type of transaction :param voivodeship: Voivodeship :param city: City :param street: Street :param filters: Dictionary with additional filters :type category:str, None :type transaction_type: str, None :type voivodeship: str, None :type city: str, None :type street: str, None :type filters: dict, None :return: Url to Domiporta search web page :rtype: str """ url = BASE_URL + encode_text_to_html(category) + "/" + encode_text_to_html(transaction_type) if voivodeship: url += "/" + encode_text_to_html(voivodeship) if city: url += "/" + encode_text_to_html(city) if street: url += "/" + encode_text_to_html(street) if filters and len(filters) > 0: for i, key in enumerate(sorted(filters.keys())): if i == 0: url += "?" else: url += "&" url += "{0}={1}".format(key, filters[key]) return url
@caching(key_func=key_md5)
[docs]def get_content_from_source(url): """ Connects with given url If environmental variable DEBUG is True it will cache response for url in /var/temp directory :param url: Website url :type url: str :return: Response for requested url """ response = requests.get(url, headers={'User-Agent': get_random_user_agent()}) try: response.raise_for_status() except requests.HTTPError as e: log.warning('Request for {0} failed. Error: {1}'.format(url, e)) return None return response.content