Get tweet reply ids from thread id using puppeteer
This commit is contained in:
@ -32,6 +32,7 @@
|
||||
"axios": "^0.27.2",
|
||||
"linkedom": "^0.14.16",
|
||||
"luxon": "^3.0.4",
|
||||
"puppeteer-core": "^19.1.1",
|
||||
"rfc2047": "^4.0.1",
|
||||
"underscore": "^1.13.6",
|
||||
"uuid": "^9.0.0"
|
||||
|
||||
@ -2,6 +2,7 @@ import { ContentHandler, PreHandleResult } from '../content-handler'
|
||||
import axios from 'axios'
|
||||
import { DateTime } from 'luxon'
|
||||
import _ from 'underscore'
|
||||
import puppeteer from 'puppeteer-core'
|
||||
|
||||
interface TweetIncludes {
|
||||
users: {
|
||||
@ -134,87 +135,93 @@ const formatTimestamp = (timestamp: string) => {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for `ms` amount of milliseconds
|
||||
* @param {number} ms
|
||||
*/
|
||||
const waitFor = async (ms: number) =>
|
||||
new Promise((resolve) => setTimeout(resolve, ms))
|
||||
|
||||
/**
|
||||
* Get tweets(even older than 7 days) using puppeteer
|
||||
* @param {string} tweetId
|
||||
*/
|
||||
const getTweetIdsFromThreadId = async (tweetId: string): Promise<string[]> => {
|
||||
const pageURL = `https://twitter.com/anyone/status/${tweetId}`
|
||||
|
||||
// Modify this variable to control the size of viewport
|
||||
const factor = 0.2
|
||||
const height = Math.floor(2000 / factor)
|
||||
const width = Math.floor(1700 / factor)
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
defaultViewport: {
|
||||
width,
|
||||
height,
|
||||
},
|
||||
args: [
|
||||
`--force-device-scale-factor=${factor}`,
|
||||
`--window-size=${width},${height}`,
|
||||
],
|
||||
})
|
||||
|
||||
const page = await browser.newPage()
|
||||
|
||||
await page.goto(pageURL, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 5 * 60 * 1000,
|
||||
})
|
||||
|
||||
await waitFor(4000)
|
||||
|
||||
/** @type {string[]} */
|
||||
const tweetIds = (await page.evaluate(async () => {
|
||||
const ids: string[] = []
|
||||
|
||||
// Find the first Show thread button and click it
|
||||
const showRepliesButton = Array.from(
|
||||
document.querySelectorAll('div[dir="auto"]')
|
||||
)
|
||||
.filter((node) => node.children[0] && node.children[0].tagName === 'SPAN')
|
||||
.find((node) => node.children[0].innerHTML === 'Show replies')
|
||||
|
||||
if (showRepliesButton) {
|
||||
;(showRepliesButton as HTMLElement).click()
|
||||
|
||||
await waitFor(2000)
|
||||
}
|
||||
|
||||
const timeNodes = Array.from(document.querySelectorAll('time'))
|
||||
|
||||
for (const timeNode of timeNodes) {
|
||||
/** @type {HTMLAnchorElement | HTMLSpanElement} */
|
||||
const timeContainerAnchor = timeNode.parentElement
|
||||
if (!timeContainerAnchor) continue
|
||||
|
||||
if (timeContainerAnchor?.tagName === 'SPAN') continue
|
||||
|
||||
const id = (timeContainerAnchor as HTMLAnchorElement).href
|
||||
.split('/')
|
||||
.reverse()[0]
|
||||
|
||||
ids.push(id)
|
||||
}
|
||||
|
||||
return ids
|
||||
})) as string[]
|
||||
|
||||
await browser.close()
|
||||
|
||||
return [tweetId, ...tweetIds]
|
||||
}
|
||||
|
||||
export class TwitterHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Twitter'
|
||||
}
|
||||
|
||||
async getTweetIdsFromThreadId(tweetID: string): Promise<string[]> {
|
||||
const pageURL = `https://twitter.com/anyone/status/${tweetID}`
|
||||
|
||||
// Modify this variable to control the size of viewport
|
||||
const factor = 0.2
|
||||
const height = Math.floor(2000 / factor)
|
||||
const width = Math.floor(1700 / factor)
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
defaultViewport: {
|
||||
width,
|
||||
height,
|
||||
},
|
||||
args: [
|
||||
`--force-device-scale-factor=${factor}`,
|
||||
`--window-size=${width},${height}`,
|
||||
],
|
||||
})
|
||||
|
||||
const page = await browser.newPage()
|
||||
|
||||
await page.goto(pageURL, {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 5 * 60 * 1000,
|
||||
})
|
||||
|
||||
await waitFor(4000)
|
||||
|
||||
/** @type {string[]} */
|
||||
const tweetIDs = await page.evaluate(async () => {
|
||||
const ids = []
|
||||
|
||||
/**
|
||||
* Wait for `ms` amount of milliseconds
|
||||
* @param {number} ms
|
||||
*/
|
||||
const waitFor = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
|
||||
|
||||
// Find the first Show thread button and click it
|
||||
const showRepliesButton = [
|
||||
...document.querySelectorAll('div[dir="auto"]'),
|
||||
]
|
||||
.filter(
|
||||
(node) => node.children[0] && node.children[0].tagName === 'SPAN'
|
||||
)
|
||||
.find((node) => node.children[0].innerHTML === 'Show replies')
|
||||
|
||||
if (showRepliesButton) {
|
||||
showRepliesButton.click()
|
||||
|
||||
await waitFor(2000)
|
||||
}
|
||||
|
||||
const timeNodes = Array.from(document.querySelectorAll('time'))
|
||||
|
||||
for (const timeNode of timeNodes) {
|
||||
/** @type {HTMLAnchorElement | HTMLSpanElement} */
|
||||
const timeContainerAnchor = timeNode.parentElement
|
||||
|
||||
if (timeContainerAnchor.tagName === 'SPAN') continue
|
||||
|
||||
const id = timeContainerAnchor.href.split('/').reverse()[0]
|
||||
|
||||
ids.push(id)
|
||||
}
|
||||
|
||||
return ids
|
||||
})
|
||||
|
||||
await browser.close()
|
||||
|
||||
return [tweetID, ...tweetIDs]
|
||||
}
|
||||
|
||||
shouldPreHandle(url: string): boolean {
|
||||
return !!TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString())
|
||||
}
|
||||
|
||||
26
yarn.lock
26
yarn.lock
@ -12333,6 +12333,11 @@ devtools-protocol@0.0.1019158:
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1019158.tgz#4b08d06108a784a2134313149626ba55f030a86f"
|
||||
integrity sha512-wvq+KscQ7/6spEV7czhnZc9RM/woz1AY+/Vpd8/h2HFMwJSdTliu7f/yr1A6vDdJfKICZsShqsYpEQbdhg8AFQ==
|
||||
|
||||
devtools-protocol@0.0.1045489:
|
||||
version "0.0.1045489"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1045489.tgz#f959ad560b05acd72d55644bc3fb8168a83abf28"
|
||||
integrity sha512-D+PTmWulkuQW4D1NTiCRCFxF7pQPn0hgp4YyX4wAQ6xYXKOadSWPR3ENGDQ47MW/Ewc9v2rpC/UEEGahgBYpSQ==
|
||||
|
||||
devtools-protocol@0.0.901419:
|
||||
version "0.0.901419"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.901419.tgz#79b5459c48fe7e1c5563c02bd72f8fec3e0cebcd"
|
||||
@ -21372,6 +21377,22 @@ puppeteer-core@^16.1.0:
|
||||
unbzip2-stream "1.4.3"
|
||||
ws "8.8.1"
|
||||
|
||||
puppeteer-core@^19.1.1:
|
||||
version "19.1.1"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-19.1.1.tgz#6416ff925a9cc78523c490482a17a2998f7c0626"
|
||||
integrity sha512-jV26Ke0VFel4MoXLjqm50uAW2uwksTP6Md1tvtXqWqXM5FyboKI6E9YYJ1qEQilUAqlhgGq8xLN5+SL8bPz/kw==
|
||||
dependencies:
|
||||
cross-fetch "3.1.5"
|
||||
debug "4.3.4"
|
||||
devtools-protocol "0.0.1045489"
|
||||
extract-zip "2.0.1"
|
||||
https-proxy-agent "5.0.1"
|
||||
proxy-from-env "1.1.0"
|
||||
rimraf "3.0.2"
|
||||
tar-fs "2.1.1"
|
||||
unbzip2-stream "1.4.3"
|
||||
ws "8.9.0"
|
||||
|
||||
puppeteer@^10.1.0:
|
||||
version "10.4.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-10.4.0.tgz#a6465ff97fda0576c4ac29601406f67e6fea3dc7"
|
||||
@ -25896,6 +25917,11 @@ ws@8.8.1, ws@^8.2.3, ws@^8.3.0, ws@^8.4.2:
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.8.1.tgz#5dbad0feb7ade8ecc99b830c1d77c913d4955ff0"
|
||||
integrity sha512-bGy2JzvzkPowEJV++hF07hAD6niYSr0JzBNo/J29WsB57A2r7Wlc1UFcTR9IzrPvuNVO4B8LGqF8qcpsVOhJCA==
|
||||
|
||||
ws@8.9.0:
|
||||
version "8.9.0"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.9.0.tgz#2a994bb67144be1b53fe2d23c53c028adeb7f45e"
|
||||
integrity sha512-Ja7nszREasGaYUYCI2k4lCKIRTt+y7XuqVoHR44YpI49TtryyqbqvDMn5eqfW7e6HzTukDRIsXqzVHScqRcafg==
|
||||
|
||||
"ws@^5.2.0 || ^6.0.0 || ^7.0.0", ws@^7.3.1, ws@^7.4.6:
|
||||
version "7.5.7"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.7.tgz#9e0ac77ee50af70d58326ecff7e85eb3fa375e67"
|
||||
|
||||
Reference in New Issue
Block a user