--reuse option

This commit is contained in:
Evgeny Zinoviev 2022-03-15 00:16:55 +03:00
parent a20b9c9b07
commit 643942f57a
2 changed files with 109 additions and 90 deletions

View File

@ -4,77 +4,90 @@ const cookiesStorage = require("./cookies-storage");
puppeteer.use(StealthPlugin());
const options = {
const chromeOptions = {
headless: true,
args: []
};
let browser = null
let page = null
let cdpClient = null
let interceptCallback = null
let interceptionNeededCallback = null
module.exports = {
async launch() {
browser = await puppeteer.launch(options)
},
class PageWrapper {
constructor() {
this.intrNeededCallback = null
this.intrCallback = null
async getPage(_interceptionNeededCallback, _interceptCallback) {
if (page && page.isClosed()) {
page.removeAllListeners && page.removeAllListeners()
page = null
this.page = null
}
async getPage(interceptionNeededCallback, interceptCallback) {
this.intrCallback = interceptCallback
this.intrNeededCallback = interceptionNeededCallback
if (this.page !== null && this.page.isClosed()) {
this.page.removeAllListeners && this.page.removeAllListeners()
this.page = null
}
interceptionNeededCallback = _interceptionNeededCallback
interceptCallback = _interceptCallback
if (this.page !== null)
return this.page
if (!page) {
page = await browser.newPage()
page.on('framenavigated', async () => {
let cookies = await page.cookies();
this.page = await browser.newPage()
this.page.on('domcontentloaded', async () => {
try {
let cookies = await this.page.cookies();
if (cookies)
await cookiesStorage.save(cookies)
})
} catch (e) {
console.warn(e)
}
})
await page.setCookie(...(await cookiesStorage.get()))
await this.page.setCookie(...(await cookiesStorage.get()))
cdpClient = await page.target().createCDPSession();
await cdpClient.send('Network.setRequestInterception', {
patterns: [{
urlPattern: '*',
resourceType: 'Document',
interceptionStage: 'HeadersReceived'
}],
})
await cdpClient.on('Network.requestIntercepted', async e => {
let obj = { interceptionId: e.interceptionId }
if (interceptionNeededCallback && interceptionNeededCallback(e) === true) {
let ret = await cdpClient.send('Network.getResponseBodyForInterception', {
interceptionId: e.interceptionId
})
interceptCallback(ret, e.responseHeaders)
obj['errorReason'] = 'BlockedByClient'
}
await cdpClient.send('Network.continueInterceptedRequest', obj)
})
}
cdpClient = await this.page.target().createCDPSession();
await cdpClient.send('Network.setRequestInterception', {
patterns: [{
urlPattern: '*',
resourceType: 'Document',
interceptionStage: 'HeadersReceived'
}],
})
await cdpClient.on('Network.requestIntercepted', async e => {
let obj = { interceptionId: e.interceptionId }
if (this.intrNeededCallback && this.intrNeededCallback(e) === true) {
let ret = await cdpClient.send('Network.getResponseBodyForInterception', {
interceptionId: e.interceptionId
})
this.intrCallback(ret, e.responseHeaders)
obj['errorReason'] = 'BlockedByClient'
}
await cdpClient.send('Network.continueInterceptedRequest', obj)
})
return page
},
setProxy(proxy) {
options.args.push(`--proxy-server=${proxy}`)
},
disableSandbox() {
options.args.push(
'--no-sandbox',
'--disable-setuid-sandbox'
)
},
setHeadful() {
options.headless = false
return this.page
}
}
let singlePageWrapper = new PageWrapper()
module.exports = {
async launch(options) {
if (options.proxy)
chromeOptions.args.push(`--proxy-server=${options.proxy}`)
if (options.noSandbox)
chromeOptions.args.push(
'--no-sandbox',
'--disable-setuid-sandbox'
)
if (options.headful)
chromeOptions.headless = false
browser = await puppeteer.launch(chromeOptions)
},
singlePageWrapper,
PageWrapper,
}

View File

@ -1,20 +1,23 @@
const cookiesStorage = require('./cookies-storage')
const browser = require('./browser')
const {singlePageWrapper, PageWrapper} = browser
const os = require('os')
const path = require('path')
const argv = require('minimist')(process.argv.slice(2), {
string: ['retries', 'timeout', 'cookies', 'port', 'proxy'],
boolean: ['no-sandbox', 'headful'],
boolean: ['no-sandbox', 'headful', 'reuse'],
stopEarly: true,
default: {
port: 3000,
retries: 10,
timeout: 30000,
cookies: path.join(os.homedir(), '.rt-pupflare-cookies.json')
cookies: path.join(os.homedir(), '.rt-pupflare-cookies.json'),
reuse: false,
}
})
let reusePage = argv.reuse
const maxTryCount = parseInt(argv.retries)
const loadingTimeout = parseInt(argv.timeout)
@ -34,30 +37,34 @@ router.get('/request', async (ctx, next) => {
data: ''
};
/*if (ctx.method === "POST") {
await page.removeAllListeners('request');
await page.setRequestInterception(true);
page.on('request', interceptedRequest => {
var data = {
'method': 'POST',
'postData': ctx.request.rawBody
};
interceptedRequest.continue(data);
});
}*/
let responseSet = false
let pageWrapper = null
await new Promise(async (resolve, reject) => {
const page = await browser.getPage(
(e) => e.isDownload === true,
(response, headers) => {
Object.assign(myResult, {
data: response.base64Encoded ? response.body : btoa(response.body),
binary: true,
headers
})
resolve()
const fInterceptionNeeded = (e) => e.isDownload === true
const fInterception = (response, headers) => {
Object.assign(myResult, {
data: response.base64Encoded ? response.body : btoa(response.body),
binary: true,
headers
})
resolve()
}
pageWrapper = reusePage ? singlePageWrapper : new PageWrapper()
const page = await pageWrapper.getPage(fInterceptionNeeded, fInterception)
// not tested
if (ctx.method === "POST") {
await page.removeAllListeners('request')
await page.setRequestInterception(true)
page.on('request', interceptedRequest => {
interceptedRequest.continue({
'method': 'POST',
'postData': ctx.request.rawBody
})
})
}
try {
let tryCount = 0
@ -96,6 +103,9 @@ router.get('/request', async (ctx, next) => {
if (!responseSet)
ctx.body = JSON.stringify(myResult)
if (!reusePage)
pageWrapper.page.close()
await next()
})
.get('/cookies', async (ctx, next) => {
@ -107,21 +117,17 @@ router.get('/request', async (ctx, next) => {
(async () => {
cookiesStorage.setFileName(argv.cookies)
// console.log(argv)
if (argv.proxy)
browser.setProxy(argv.proxy)
if (argv['no-sandbox'])
browser.disableSandbox()
if (argv.headful)
browser.setHeadful()
await browser.launch()
await browser.launch({
proxy: argv.proxy ?? null,
noSandbox: argv['no-sandbox'] ?? false,
headful: argv.headful ?? false,
})
app.use(router.routes())
.use(router.allowedMethods())
app.on('error', (error) => {
console.error(error)
console.error('[app.onerror]', error)
})
app.listen(parseInt(argv.port), '127.0.0.1')