diff --git a/packages/content-handler/src/websites/github-handler.ts b/packages/content-handler/src/websites/github-handler.ts index c31fe2224..92da72bcd 100644 --- a/packages/content-handler/src/websites/github-handler.ts +++ b/packages/content-handler/src/websites/github-handler.ts @@ -12,10 +12,29 @@ export class GitHubHandler extends ContentHandler { async preParse(url: string, dom: Document): Promise { const body = dom.querySelector('body') + const head = dom.querySelector('head') const article = dom.querySelector('article') + const twitterTitle = dom.querySelector(`meta[name='twitter:title']`) + const linkAuthor = dom.querySelector(`span[itemprop='author']`) if (body && article) { - body?.replaceChildren(article) + body.replaceChildren(article) + + // Attempt to set the author also. This is available on repo homepages + // but not on things like PRs. Ideally we want PRs and issues to have + // author set to the author of the PR/issue. + if (linkAuthor && linkAuthor.textContent) { + const author = dom.createElement('span') + author.setAttribute('rel', 'author') + author.innerHTML = linkAuthor.textContent + article.appendChild(author) + } + } + + // Remove the GitHub - and repo org from the title + const twitterTitleContent = twitterTitle?.getAttribute('content') + if (twitterTitle && twitterTitleContent) { + twitterTitle.setAttribute('content', twitterTitleContent.replace(/GitHub - .*\//, '')) } return Promise.resolve(dom)