diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index e6fa8c0cf..69ce3f362 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -37,6 +37,7 @@ import { WeixinQqHandler } from './websites/weixin-qq-handler' import { WikipediaHandler } from './websites/wikipedia-handler' import { YoutubeHandler } from './websites/youtube-handler' import { TheAtlanticHandler } from './websites/the-atlantic-handler' +import { ArsTechnicaHandler } from './websites/ars-technica-handler' const validateUrlString = (url: string): boolean => { const u = new URL(url) @@ -57,6 +58,7 @@ const validateUrlString = (url: string): boolean => { } const contentHandlers: ContentHandler[] = [ + new ArsTechnicaHandler(), new TheAtlanticHandler(), new AppleNewsHandler(), new BloombergHandler(), diff --git a/packages/content-handler/src/websites/ars-technica-handler.ts b/packages/content-handler/src/websites/ars-technica-handler.ts new file mode 100644 index 000000000..9091f4cf8 --- /dev/null +++ b/packages/content-handler/src/websites/ars-technica-handler.ts @@ -0,0 +1,86 @@ +import axios from 'axios' +import { parseHTML } from 'linkedom' +import { ContentHandler, PreHandleResult } from '../content-handler' + +/** + * Some of the content on Ars Technica is split over several pages. + * If this is the case we should unfurl the entire article into one. l + */ +export class ArsTechnicaHandler extends ContentHandler { + constructor() { + super() + this.name = 'ArsTechnica' + } + + shouldPreHandle(url: string): boolean { + const u = new URL(url) + return u.hostname.endsWith('arstechnica.com') + } + + hasMultiplePages(document: Document): boolean { + return document.querySelectorAll('nav.page-numbers')?.length != 0 + } + + async grabContentFromUrl(url: string): Promise { + const response = await axios.get(url) + const data = response.data as string + return parseHTML(data).document + } + + async extractArticleContentsFromLink(url: string): Promise { + const dom = await this.grabContentFromUrl(url) + const articleContent = dom.querySelector('[itemprop="articleBody"]') + return [].slice.call(articleContent?.childNodes || []) + } + + async expandLinksAndCombine(document: Document): Promise { + const pageNumbers = document.querySelector('nav.page-numbers') + const articleBody = document.querySelector('[itemprop="articleBody"]') + + if (!pageNumbers || !articleBody) { + // We shouldn't ever really get here, but sometimes weird things happen. + return document + } + + const pageLinkNodes = pageNumbers.querySelectorAll('a') + // Remove the "Next" Link, as it will duplicate some content. + const pageLinks = + Array.from(pageLinkNodes) + ?.slice(0, pageLinkNodes.length - 1) + ?.map(({ href }) => href) ?? [] + + const pageContents = await Promise.all( + pageLinks.map(this.extractArticleContentsFromLink.bind(this)) + ) + + for (const articleContents of pageContents) { + // We place all the content in a span to indicate that a page has been parsed. + const span = document.createElement('SPAN') + span.className = 'nextPageContents' + span.append(...articleContents) + articleBody.append(span) + } + pageNumbers.remove() + + return document + } + + async preHandle(url: string): Promise { + // We simply retrieve the article without Javascript enabled using a GET command. + const dom = await this.grabContentFromUrl(url) + if (!this.hasMultiplePages(dom)) { + return { + content: dom.body.outerHTML, + title: dom.title, + dom, + } + } + + const expandedDom = await this.expandLinksAndCombine(dom) + return { + content: expandedDom.body.outerHTML, + title: dom.title, + dom: expandedDom, + } + } +} diff --git a/packages/content-handler/test/ars-technica.test.ts b/packages/content-handler/test/ars-technica.test.ts new file mode 100644 index 000000000..0d2575bc6 --- /dev/null +++ b/packages/content-handler/test/ars-technica.test.ts @@ -0,0 +1,82 @@ +import { ArsTechnicaHandler } from '../src/websites/ars-technica-handler' +import fs from 'fs' +import nock from 'nock' +import { expect } from 'chai' +import { parseHTML } from 'linkedom' + +describe('Testing parsing multi-page articles from arstechnica.', () => { + let orignalArticle: Document | undefined + let htmlPg1: string | null + let htmlPg2: string | null + let htmlPg3: string | null + + const load = (path: string): string => { + return fs.readFileSync(path, 'utf8') + } + + before(() => { + htmlPg1 = load('./test/data/ars-multipage/ars-technica-page-1.html') + htmlPg2 = load('./test/data/ars-multipage/ars-technica-page-2.html') + htmlPg3 = load('./test/data/ars-multipage/ars-technica-page-3.html') + + orignalArticle = parseHTML(htmlPg1).document + }) + + beforeEach(() => { + nock('https://arstechnica.com').get('/article/').reply(200, htmlPg1!) + nock('https://arstechnica.com').get('/article/2/').reply(200, htmlPg2!) + nock('https://arstechnica.com').get('/article/3/').reply(200, htmlPg3!) + }) + + afterEach(() => { + nock.cleanAll(); + }) + + it('should parse the title of the atlantic article.', async () => { + const response = await new ArsTechnicaHandler().preHandle( + 'https://arstechnica.com/article/' + ) + + // We grab the title from the doucment. + expect(response.title).not.to.be.undefined + expect(response.title).to.equal( + 'What’s going on with the reports of a room-temperature superconductor? | Ars Technica' + ) + }) + + it('should remove the navigation links', async () => { + const response = await new ArsTechnicaHandler().preHandle( + 'https://arstechnica.com/article/' + ) + + expect(orignalArticle?.querySelector('nav.page-numbers')).not.to.be.null + expect(response.dom?.querySelectorAll('nav.page-numbers').length).to.equal(0); + }) + + it('should append all new content into the main article', async () => { + const response = await new ArsTechnicaHandler().preHandle( + 'https://arstechnica.com/article/' + ) + + // We name the div to ensure we can validate that it has been inserted. + expect( + orignalArticle?.getElementsByClassName('nextPageContents')?.length || 0 + ).to.equal(0) + expect( + response.dom?.getElementsByClassName('nextPageContents')?.length || 0 + ).not.to.equal(0) + }) + + it('should remove any related content links.', async () => { + const response = await new ArsTechnicaHandler().preHandle( + 'https://arstechnica.com/article/' + ) + + // This exists in the HTML, but we remove it when preparsing. + expect( + response.dom?.getElementsByClassName( + 'ArticleRelatedContentModule_root__BBa6g' + ).length + ).to.eql(0) + }) +}) diff --git a/packages/content-handler/test/data/ars-multipage/ars-technica-page-1.html b/packages/content-handler/test/data/ars-multipage/ars-technica-page-1.html new file mode 100644 index 000000000..8e97b4aa5 --- /dev/null +++ b/packages/content-handler/test/data/ars-multipage/ars-technica-page-1.html @@ -0,0 +1,855 @@ + + + + + What’s going on with the reports of a room-temperature superconductor? | Ars Technica + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + +
+
+
+
+

+ No answers yet — +

+

What’s going on with the reports of a room-temperature superconductor?

+

Rumors are flying of confirmation, but the situation is still frustratingly vague.

+
+
+
+ +
+ Pellet of LK-99 being repelled by a magnet. +
Enlarge / Pellet of LK-99 being repelled by a magnet.
+ + + + + + + +

In late July, a couple of startling papers appeared on the arXiv, a repository of pre-peer-review manuscripts on topics in physics and astronomy. The papers claim to describe the synthesis of a material that is not only able to superconduct above room temperature, but also above the boiling point of water. And it does so at normal atmospheric pressures.

+

Instead of having to build upon years of work with exotic materials that only work under extreme conditions, the papers seem to describe a material that could be made via some relatively straightforward chemistry and would work if you set it on your desk. It was like finding a shortcut to a material that would revolutionize society.

+

The perfect time to write an article on those results would be when they've been confirmed by multiple labs. But these are not perfect times. Instead, rumors seem to be flying daily about possible confirmation, confusing and contradictory results, and informed discussions of why this material either should or shouldn't work.

+

In this article, we'll explain where things stand and why getting to a place of clarity will be challenging, even if these claims are right.

+

What’s the original claim?

+

The more detailed of the two manuscripts describes how to make the material and measurements of its property. The material itself is a variation of a well-known chemical called lead apatite. Apatites are a class of chemicals that form similar crystal structures; this particular version is primarily composed of lead and phosphate groups—all of its constituents are cheap and readily available.

+ +

The version developed here, which has been termed LK-99, was made by reacting a lead sulfate with a copper-phosphorus compound (the reaction requires high temperatures for over a day under a vacuum). This strips the phosphorus from the copper, oxidizes it, and allows it to displace the sulfur from its compound with the lead. Critically, though, some fraction of the lead itself ends up replaced by copper in the resulting compound.

+

This has a significant impact on the apatite crystal structure because copper is quite a bit smaller than lead. The researchers claim the overall volume of the sample drops by about half of a percentage as a result, and that change is accompanied by shifts in the orientation of various atoms and bonds. That means changes in where the electrons reside within the material.

+

That change appears to be critical to the LK-99's behavior. Superconductivity is associated with a number of very specific properties, and the researchers measure two of them: the expulsion of magnetic field lines (called the Meissner effect) and the existence of a critical temperature at which conductivity changes.

+

It's hard to explain just how strange these experiments are. Under normal circumstances, the superconducting material starts out behaving as a normal chemical and has to be cooled down to the critical point where exceptional behavior emerges. LK-99, by contrast, starts out superconducting and has to be heated beyond the boiling point of water to reach its critical temperature.

+

The only somewhat strange result here comes at temperatures just below the critical temperature. At room temperature and above, the resistance of LK-99 remains at zero as far as the testing equipment is able to measure. But it starts to rise ever so slightly once temperatures reach 60°C and displays a smooth upward slope until the sample hits 90°C, at which point it stays flat until the critical temperature is reached. The researchers did not attempt to explain this.

+ +
+ + + + +
+
+
+
+ + + +
+ + + +
+
+
+ +
+
+ + +
+
+
+
+ + +
+
+ + +
+ + + + +
+ +
+
+
+
+ +
+
+
+
+ +
+
+
+
+
+
+

Channel Ars Technica

+
+
+
+ +
+ +
+ + +
+
+
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/packages/content-handler/test/data/ars-multipage/ars-technica-page-2.html b/packages/content-handler/test/data/ars-multipage/ars-technica-page-2.html new file mode 100644 index 000000000..2f8cc0b95 --- /dev/null +++ b/packages/content-handler/test/data/ars-multipage/ars-technica-page-2.html @@ -0,0 +1,841 @@ + + + + + What’s going on with the reports of a room-temperature superconductor? | Ars Technica + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + +
+
+
+
+

+ No answers yet — +

+

What’s going on with the reports of a room-temperature superconductor?

+

Rumors are flying of confirmation, but the situation is still frustratingly vague.

+
+
+
+ + + + + + +

What about those other superconductors?

+

The news comes at a somewhat awkward time for the field. A similar claim was made about a high-pressure material a few years ago, but that paper ended up being retracted because of problems with some of its data. The same research group came back with a different material that was said to work at room temperature, but that work hasn't been replicated, and the head of the lab has now been accused of scientific misconduct.

+

There is almost no overlap between that work and LK-99. None of the people involved are the same, so there's no reason to suspect problematic research practices. And the chemistry and physics involved are completely different. The earlier work used high pressure to create chemicals with lots of hydrogen and unusual orbital structures. LK-99 uses no hydrogen at all and gets its orbital structures via a conformational change in a crystal lattice that takes place at ambient pressures.

+

Hydrogen was the focus of the earlier work because its low atomic weight influences the behavior of vibrations within the material in a way that promotes the formation of superconducting pairs of electrons. The mechanism behind LK-99 is less clear, but clearly not that.

+

(The LK-99's creators suggest that the conformational change in the crystal creates a sort of standing wave of electrons called a "charge density wave," and superconductivity involves electrons tunneling between wave sites. The modeling paper, by contrast, suggests that giving electrons the opportunity to both superconduct and participate in additional processes like charge density wave formation increases the probability that they'll superconduct. In any case, neither idea involves phonons.)

+ +

So when will we actually know anything?

+

Hopefully soon. The researchers behind the original report are trying to get information out there. In addition to the drafts placed in the arXiv, they have already published a paper on LK-99, albeit in their native Korean. And a group of South Korean scientists working in the field have also announced that they're going to obtain LK-99 samples and try to confirm its reported behavior.

+

There's also lots of activity outside of South Korea. Producing LK-99 is within reach of a lot of labs, and testing it is much easier since it doesn't require low temperatures or high pressures. That will mean a lot of short-term confusion, but it's likely to enable a consensus to emerge sooner.

+

Whether or not this chemical superconducts at ambient temperatures might not be the final question, though. Assuming it does, there will be many questions about how to develop it into a useful material, how much current it can carry, and how to use it most effectively in the huge range of applications it can be put to. But I'm sure we'll all be happy if we end up needing answers to those questions.

+ +
+ + + + +
+
+
+
+ + + +
+ + + +
+
+
+ +
+
+ + +
+
+
+
+ + +
+
+ + +
+ + + + +
+ +
+
+
+
+ +
+
+
+
+ +
+
+
+
+
+
+

Channel Ars Technica

+
+
+
+ +
+ +
+ + +
+
+
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/packages/content-handler/test/data/ars-multipage/ars-technica-page-3.html b/packages/content-handler/test/data/ars-multipage/ars-technica-page-3.html new file mode 100644 index 000000000..40b431ef4 --- /dev/null +++ b/packages/content-handler/test/data/ars-multipage/ars-technica-page-3.html @@ -0,0 +1,841 @@ + + + + + What’s going on with the reports of a room-temperature superconductor? | Ars Technica + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + +
+
+
+
+

+ No answers yet — +

+

What’s going on with the reports of a room-temperature superconductor?

+

Rumors are flying of confirmation, but the situation is still frustratingly vague.

+
+
+
+ + + + + + +

What about those other superconductors?

+

The news comes at a somewhat awkward time for the field. A similar claim was made about a high-pressure material a few years ago, but that paper ended up being retracted because of problems with some of its data. The same research group came back with a different material that was said to work at room temperature, but that work hasn't been replicated, and the head of the lab has now been accused of scientific misconduct.

+

There is almost no overlap between that work and LK-99. None of the people involved are the same, so there's no reason to suspect problematic research practices. And the chemistry and physics involved are completely different. The earlier work used high pressure to create chemicals with lots of hydrogen and unusual orbital structures. LK-99 uses no hydrogen at all and gets its orbital structures via a conformational change in a crystal lattice that takes place at ambient pressures.

+

Hydrogen was the focus of the earlier work because its low atomic weight influences the behavior of vibrations within the material in a way that promotes the formation of superconducting pairs of electrons. The mechanism behind LK-99 is less clear, but clearly not that.

+

(The LK-99's creators suggest that the conformational change in the crystal creates a sort of standing wave of electrons called a "charge density wave," and superconductivity involves electrons tunneling between wave sites. The modeling paper, by contrast, suggests that giving electrons the opportunity to both superconduct and participate in additional processes like charge density wave formation increases the probability that they'll superconduct. In any case, neither idea involves phonons.)

+ +

So when will we actually know anything?

+

Hopefully soon. The researchers behind the original report are trying to get information out there. In addition to the drafts placed in the arXiv, they have already published a paper on LK-99, albeit in their native Korean. And a group of South Korean scientists working in the field have also announced that they're going to obtain LK-99 samples and try to confirm its reported behavior.

+

There's also lots of activity outside of South Korea. Producing LK-99 is within reach of a lot of labs, and testing it is much easier since it doesn't require low temperatures or high pressures. That will mean a lot of short-term confusion, but it's likely to enable a consensus to emerge sooner.

+

Whether or not this chemical superconducts at ambient temperatures might not be the final question, though. Assuming it does, there will be many questions about how to develop it into a useful material, how much current it can carry, and how to use it most effectively in the huge range of applications it can be put to. But I'm sure we'll all be happy if we end up needing answers to those questions.

+ +
+ + + + +
+
+
+
+ + + +
+ + + +
+
+
+ +
+
+ + +
+
+
+
+ + +
+
+ + +
+ + + + +
+ +
+
+
+
+ +
+
+
+
+ +
+
+
+
+
+
+

Channel Ars Technica

+
+
+
+ +
+ +
+ + +
+
+
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file