initial commit

This commit is contained in:
Evgeny Zinoviev 2021-01-03 21:41:50 +03:00
commit d05ab59b47
11 changed files with 646 additions and 0 deletions

12
.gitignore vendored Normal file
View File

@ -0,0 +1,12 @@
*.pyc
__pycache__/
instance/
.pytest_cache/
.coverage
htmlcov/
dist/
build/
*.egg-info/

91
app/__init__.py Normal file
View File

@ -0,0 +1,91 @@
import os
import time
from . import acmespb
from flask import Flask, render_template
from flask_socketio import SocketIO, emit
socketio = SocketIO()
def create_app(test_config=None):
app = Flask(__name__, instance_relative_config=True)
app.config.from_mapping(
SECRET_KEY='dev',
DATABASE=os.path.join(app.instance_path, 'app.sqlite'),
)
if test_config is None:
# load the instance config, if it exists, when not testing
app.config.from_pyfile('config.py', silent=True)
else:
# load the test config if passed in
app.config.from_mapping(test_config)
# ensure the instance folder exists
try:
os.makedirs(app.instance_path)
except OSError:
pass
socketio.init_app(app)
@app.route('/')
def hello():
return render_template('index.html')
@socketio.on('get_hints')
def handle_get_hints_event(q):
print('[get_hints] id=%d, query=%s' % (q['id'], q['query']))
if len(q['query']) < 3:
response = {
'id': q['id'],
'error': "query is too short"
}
emit('hints', response)
return
results = acmespb.search(q['query'])
response = {
'id': q['id'],
'response': results
}
emit('hints', response)
@socketio.on('get_offers')
def handle_get_offers_event(q):
print('[get_offers] id=%d, query=%s' % (q['id'], q['query']))
target_url, trade_names = acmespb.trade_names(q['query'])
if trade_names:
response = {
'id': q['id'],
"response": trade_names
}
emit('hints', response)
return
page = 1
pages = 0
target_url = None
while pages == 0 or page <= pages:
target_url, pages, offers = acmespb.offers(q['query'], page=page, target_url=target_url)
print("[%d] pages=%d, target_url=%s" % (page, pages, target_url))
response = {
'id': q['id'],
'offers': [offer.as_dict() for offer in offers],
'page': page,
'pages': pages
}
emit('offers', response)
time.sleep(0.5)
page += 1
response = {
'id': q['id'],
'end': True
}
emit('offers', response)
# TODO empty response
return app

133
app/acmespb.py Normal file
View File

@ -0,0 +1,133 @@
import requests
import urllib.parse
import json
import re
import math
import hashlib
from bs4 import BeautifulSoup
headers = {
'Referer': 'https://www.acmespb.ru/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.89 Safari/537.36'
}
proxies = {
'http': 'socks5://127.0.0.1:1079',
'https': 'socks5://127.0.0.1:1079'
}
per_page = 50
session = requests.Session()
session.proxies.update(proxies)
session.headers.update(headers)
class AcmeException(Exception):
pass
class AcmePharmacy:
def __init__(self, name='', address='', phone='', geo=None):
self.name = name
self.address = address
self.phone = phone
self.geo = geo
def as_dict(self):
dict = self.__dict__
dict['hash'] = hashlib.md5(("%s|%s" % (self.address, self.name)).encode('utf-8')).hexdigest()
return dict
class AcmeOffer:
def __init__(self, name='', country='', pharmacy=None, price=None):
self.name = name
self.country = country
self.pharmacy = pharmacy
self.price = price
def as_dict(self):
dict = self.__dict__
dict['pharmacy'] = self.pharmacy.as_dict()
return dict
def search(query):
url = "https://www.acmespb.ru/lib/autocomplete.php?term=" + urllib.parse.quote(query)
r = session.get(url, allow_redirects=False)
if r.text == "":
return []
r.encoding = "utf-8"
return json.loads(r.text)
def trade_names(query):
url = "https://www.acmespb.ru/search.php"
r = session.post(url, {"free_str": query}, allow_redirects=False)
if r.status_code != 301:
raise AcmeException("status_code is %d" % (r.status_code,))
if '/trade/' not in r.headers["location"]:
return r.headers["location"], None
r = session.get(r.headers["location"], allow_redirects=False)
r.encoding = "utf-8"
soup = BeautifulSoup(r.text, "html.parser")
trades = soup.find(id="trades")
return None, [opt.string for opt in trades.find_all("option") if opt["value"] != "all"]
def _get_location(query):
url = "https://www.acmespb.ru/search.php"
data = {"free_str": query}
r = session.post(url, data, allow_redirects=False)
return r.headers["location"]
def offers(query, target_url=None, page=1):
if target_url is None:
target_url = _get_location(query)
data = {
"free_str": query,
"page": page
}
r = session.post(target_url, data, allow_redirects=False)
r.encoding = "utf-8"
if r.status_code != 200:
raise AcmeException("status_code is %d, expected 200" % (r.status_code,))
pages = 1
soup = BeautifulSoup(r.text, "html.parser")
p = soup.find("p", class_="red")
if p:
total_matches = int(re.findall("([0-9]+)", p.string)[0])
pages = math.ceil(total_matches / per_page)
offers = []
for trow in soup.find_all('div', class_='trow'):
if 'thead' in trow['class']:
continue
name = trow.select_one('.cell.name p.sra').text
country = trow.select_one('.cell.country').text
phname = trow.select_one('.cell.pharm a').text
price = float(trow.select_one('.cell.pricefull').text)
# parse address, geo coordinates and phone number
addr_div = trow.select_one('.cell.address')
phone = re.findall('тел\.([^<]+)', addr_div.text)[0].strip()
addr_link = addr_div.select_one('a')
address = addr_link.text
geo = re.findall('text=([0-9\.]+),([0-9\.]+)', addr_link['href'])[0]
geo = list(map(lambda x: float(x), geo))
acmepharm = AcmePharmacy(name=phname, address=address, phone=phone, geo=geo)
acmeoffer = AcmeOffer(name=name, country=country, price=price, pharmacy=acmepharm)
offers.append(acmeoffer)
return target_url, pages, offers

191
app/static/app.js Normal file
View File

@ -0,0 +1,191 @@
class Search {
constructor() {
this.searchDebounced = _.debounce((query) => {
if (query.length < 3)
return;
this.socket.emit('get_hints', {
id: this.updateRequestId(),
query
});
}, 150);
let field = document.getElementById('queryInput');
let btn = document.getElementById('querySubmit');
this.autoComplete = new Autocomplete(field, {
data: [],
maximumItems: 10,
onInput: (value) => {
this.searchDebounced(value);
},
onSelectItem: ({label}) => {
// console.log('selected:', label)
},
highlightClass: 'text-danger'
});
btn.addEventListener('click', this.onSubmit);
field.addEventListener('keydown', this.onInputKeyDown);
this.btn = btn;
this.field = field;
this.socket = io();
this.socket.on('hints', this.onHints);
this.socket.on('offers', this.onOffers)
}
updateRequestId() {
this.requestId = requestId();
return this.requestId;
}
onInputKeyDown = (e) => {
if (e.keyCode === 10 || e.keyCode === 13)
this.onSubmit();
}
onSubmit = (e) => {
if (this.isLocked())
return;
this.lockButton('Загрузка...');
gMaps.removeAllPoints();
this.socket.emit('get_offers', {
id: this.updateRequestId(),
query: this.field.value
});
}
onHints = (data) => {
if (data.id !== this.requestId)
return;
this.unlockButton();
if (data.error) {
console.warn(data.error);
return;
}
this.autoComplete.setData(data.response.map(item => {
return {label: item, value: ''};
}));
this.autoComplete.renderIfNeeded();
}
onOffers = (data) => {
if (data.id !== this.requestId)
return;
if (data.end) {
this.unlockButton();
return;
} else {
this.lockButton(data.pages > 1 ? `${data.page} из ${data.pages}` : null);
}
for (let offer of data.offers)
gMaps.addOffer(offer);
}
isLocked() {
return this.btn.classList.contains('disabled');
}
lockButton(text) {
if (text !== null)
this.btn.innerText = text;
this.btn.classList.add('disabled');
}
unlockButton() {
this.btn.classList.remove('disabled');
this.btn.innerText = 'Поиск';
}
}
class Maps {
constructor() {
/**
* @type {ymaps.Map}
*/
this.map = null;
ymaps.ready(this.onInit);
this.places = {};
}
onInit = () => {
this.map = new ymaps.Map("mapContainer", {
center: [59.94, 30.32],
zoom: 11
});
this.map.controls.remove('searchControl');
}
addPoint({geo, offersRef, hint, pharmacyName, pharmacyAddress, pharmacyPhone}) {
let mark = new ymaps.Placemark(geo, {
hintContent: hint,
}, {
preset: 'islands#dotIcon',
openEmptyBalloon: true,
iconColor: '#3caa3c'
});
mark.events.add('balloonopen', e => {
let lines = offersRef.map(offer => {
return `${offer.name} (${offer.price} руб.)`
});
let html = `<b>${pharmacyName}</b><br>`;
html += `${pharmacyAddress}<br>`;
html += `тел: ${pharmacyPhone}<br><br>`;
html += lines.join('\n');
mark.properties.set('balloonContent', html);
});
this.map.geoObjects.add(mark);
return mark;
}
removeAllPoints() {
this.map.geoObjects.removeAll();
}
addOffer(offer) {
// console.log('[addOffer]', offer);
let hash = offer.pharmacy.hash;
if (hash in this.places)
this.places[hash].offers.push(offer);
else {
this.places[hash] = {
offers: [offer],
};
this.places[hash].mark = this.addPoint({
geo: offer.pharmacy.geo,
hint: offer.pharmacy.name,
pharmacyName: offer.pharmacy.name,
pharmacyAddress: offer.pharmacy.address,
pharmacyPhone: offer.pharmacy.phone,
offersRef: this.places[hash].offers
});
}
}
}
function requestId() {
return _.random(1, 99999999);
}
let gMaps, gSearch;
window.addEventListener('DOMContentLoaded', function() {
gSearch = new Search();
gMaps = new Maps();
// document.getElementById('test').addEventListener('click', () => {
// gMaps.addTestPoint();
// });
});

135
app/static/autocomplete.js Normal file
View File

@ -0,0 +1,135 @@
const DEFAULTS = {
treshold: 2,
maximumItems: 5,
highlightTyped: true,
highlightClass: 'text-primary',
};
class Autocomplete {
constructor(field, options) {
this.field = field;
this.options = Object.assign({}, DEFAULTS, options);
this.dropdown = null;
field.parentNode.classList.add('dropdown');
field.setAttribute('data-toggle', 'dropdown');
field.classList.add('dropdown-toggle');
const dropdown = ce(`<div class="dropdown-menu" ></div>`);
if (this.options.dropdownClass)
dropdown.classList.add(this.options.dropdownClass);
insertAfter(dropdown, field);
this.dropdown = new bootstrap.Dropdown(field, this.options.dropdownOptions)
field.addEventListener('click', (e) => {
if (this.createItems() === 0) {
// prevent show empty
e.stopPropagation();
this.dropdown.hide();
// field.dropdown('hide');
}
});
field.addEventListener('input', () => {
if (this.options.onInput)
this.options.onInput(this.field.value);
this.renderIfNeeded();
});
field.addEventListener('keydown', (e) => {
if (e.keyCode === 27) {
this.dropdown.hide();
return;
}
});
}
setData(data) {
this.options.data = data;
}
renderIfNeeded() {
if (this.createItems() > 0) {
this.dropdown.show();
// field.dropdown('show');
} else {
// sets up positioning
this.field.click();
}
}
createItem(lookup, item) {
let label;
if (this.options.highlightTyped) {
const idx = item.label.toLowerCase().indexOf(lookup.toLowerCase());
const className = Array.isArray(this.options.highlightClass) ? this.options.highlightClass.join(' ')
: (typeof this.options.highlightClass == 'string' ? this.options.highlightClass : '')
label = item.label.substring(0, idx)
+ `<span class="${className}">${item.label.substring(idx, idx + lookup.length)}</span>`
+ item.label.substring(idx + lookup.length, item.label.length);
} else {
label = item.label;
}
return ce(`<button type="button" class="dropdown-item" data-value="${item.value}">${label}</button>`);
}
createItems() {
const lookup = this.field.value;
if (lookup.length < this.options.treshold) {
this.dropdown.hide();
// field.dropdown('hide');
return 0;
}
const items = this.field.nextSibling;
items.innerHTML = '';
let count = 0;
for (let i = 0; i < this.options.data.length; i++) {
const {label, value} = this.options.data[i];
const item = {label, value};
if (item.label.toLowerCase().indexOf(lookup.toLowerCase()) >= 0) {
items.appendChild(this.createItem(lookup, item));
if (this.options.maximumItems > 0 && ++count >= this.options.maximumItems)
break;
}
}
this.field.nextSibling.querySelectorAll('.dropdown-item').forEach((item) => {
item.addEventListener('click', (e) => {
let dataValue = e.target.getAttribute('data-value');
this.field.value = e.target.innerText;
if (this.options.onSelectItem) {
this.options.onSelectItem({
value: e.target.value,
label: e.target.innerText,
});
}
this.dropdown.hide();
})
});
return items.childNodes.length;
}
}
/**
* @param html
* @returns {Node}
*/
function ce(html) {
let div = document.createElement('div');
div.innerHTML = html;
return div.firstChild;
}
/**
* @param elem
* @param refElem
* @returns {*}
*/
function insertAfter(elem, refElem) {
return refElem.parentNode.insertBefore(elem, refElem.nextSibling)
}

17
app/static/style.css Normal file
View File

@ -0,0 +1,17 @@
/*.acme-container {*/
/* margin-top: 1.5rem;*/
/*}*/
#test {
position: absolute;
color: #fff;
background-color: red;
opacity: 0.25;
top: 0;
right: 0;
padding: 5px 8px;
cursor: pointer;
}
#test:hover {
opacity: 1;
}

39
app/templates/base.html Normal file
View File

@ -0,0 +1,39 @@
<!doctype html>
<html lang="ru" class="h-100 mh-100">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-giJF6kkoqNQ00vy+HMDP7azOuL0xtbfIcaT9wjKHr8RbDVddVHyTfAAsrekwKmP1" crossorigin="anonymous">
<link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta1/dist/js/bootstrap.bundle.min.js" integrity="sha384-ygbV9kiqUc6oa4msXn9868pTtWMgiQaeYH7/t7LECLbyPA2x65Kgf80OJFdroafW" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/3.0.4/socket.io.js" integrity="sha512-aMGMvNYu8Ue4G+fHa359jcPb1u+ytAF+P2SCb+PxrjCdO3n3ZTxJ30zuH39rimUggmTwmh2u7wvQsDTHESnmfQ==" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.20/lodash.min.js" integrity="sha512-90vH1Z83AJY9DmlWa8WkjkV79yfS2n2Oxhsi2dZbIv0nC4E6m5AbH8Nh156kkM7JePmqD6tcZsfad1ueoaovww==" crossorigin="anonymous"></script>
<script src="https://api-maps.yandex.ru/2.1/?apikey=ce936229-3ef4-41b1-96c0-270bcf8ff341&lang=ru_RU" type="text/javascript"></script>
<script src="{{ url_for('static', filename='autocomplete.js') }}"></script>
<script src="{{ url_for('static', filename='app.js') }}"></script>
<title>{% block title %}{% endblock %}</title>
</head>
<body class="h-100 mh-100">
<div class="container h-100 pt-4 pb-4">
<div class="h-100 d-flex flex-column bd-highlight">
<div>
<form class="mb-4" onsubmit="return false">
<div class="input-group">
<input type="text" class="form-control" id="queryInput" placeholder="Введите название препарата" autocomplete="off">
<button type="submit" class="btn btn-outline-primary" id="querySubmit">Поиск</button>
</div>
</form>
</div>
<div class="flex-grow-1" id="mapContainer">
<!-- maps -->
</div>
</div>
</div>
<div id="test">test</div>
</body>
</html>

2
app/templates/index.html Normal file
View File

@ -0,0 +1,2 @@
{% extends "base.html" %}

15
app/test.py Normal file
View File

@ -0,0 +1,15 @@
import acmespb
import sys
from pprint import pprint
if __name__ == "__main__":
#pprint(acmespb.trade_names("Марена красильная корневища и корни"))
page = 1
pages = 0
target_url = None
while pages == 0 or page <= pages:
target_url, pages, offers = acmespb.offers("Верошпирон", page=page, target_url=target_url)
print("[%d] pages=%d, target_url=%s" % (page, pages, target_url))
for offer in offers:
print(offer.as_dict())
page += 1

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
requests~=2.25.1
requests[socks]
beautifulsoup4~=4.9.3
Flask~=1.1.2

7
server.py Normal file
View File

@ -0,0 +1,7 @@
#!/bin/env python
from app import create_app, socketio
app = create_app()
if __name__ == '__main__':
socketio.run(app)