nice
This commit is contained in:
1
raypeat-articles/articles.html
Normal file
1
raypeat-articles/articles.html
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
485
raypeat-articles/articles.ts
Normal file
485
raypeat-articles/articles.ts
Normal file
@@ -0,0 +1,485 @@
|
||||
const articles = [
|
||||
{
|
||||
title: "TSH, temperature, pulse rate, and other indicators in hypothyroidism",
|
||||
url: "http://raypeat.com/articles/articles/hypothyroidism.shtml",
|
||||
slug: "hypothyroidism",
|
||||
},
|
||||
{
|
||||
title: "Academic authoritarians, language, metaphor, animals, and science",
|
||||
url: "http://raypeat.com/articles/articles/authoritarians.shtml",
|
||||
slug: "authoritarians",
|
||||
},
|
||||
{
|
||||
title: "Adaptive substance, creative regeneration: Mainstream science, repression, and creativity",
|
||||
url: "http://raypeat.com/articles/articles/adaptive-substance.shtml",
|
||||
slug: "adaptive-substance",
|
||||
},
|
||||
{
|
||||
title: "Aging Eyes, Infant Eyes, and Excitable Tissues",
|
||||
url: "http://raypeat.com/articles/articles/aging-eyes.shtml",
|
||||
slug: "aging-eyes",
|
||||
},
|
||||
{
|
||||
title: "Aging, estrogen, and progesterone.",
|
||||
url: "http://raypeat.com/articles/aging/aging-estrogen-progesterone.shtml",
|
||||
slug: "aging-estrogen-progesterone",
|
||||
},
|
||||
{
|
||||
title: "Altitude and Mortality.",
|
||||
url: "http://raypeat.com/articles/aging/altitude-mortality.shtml",
|
||||
slug: "altitude-mortality",
|
||||
},
|
||||
{
|
||||
title: "Alzheimer's: The problem of Alzheimer's disease as a clue to immortality - part 1.",
|
||||
url: "http://raypeat.com/articles/articles/alzheimers.shtml",
|
||||
slug: "alzheimers",
|
||||
},
|
||||
{
|
||||
title: "Alzheimer's: The problem of Alzheimer's disease as a clue to immortality - part 2.",
|
||||
url: "http://raypeat.com/articles/articles/alzheimers2.shtml",
|
||||
slug: "alzheimers2",
|
||||
},
|
||||
{
|
||||
title: "Aspirin, brain and cancer.",
|
||||
url: "http://raypeat.com/articles/aging/aspirin-brain-cancer.shtml",
|
||||
slug: "aspirin-brain-cancer",
|
||||
},
|
||||
{
|
||||
title: "Autonomic systems.",
|
||||
url: "http://raypeat.com/articles/other/autonomic-systems.shtml",
|
||||
slug: "autonomic-systems",
|
||||
},
|
||||
{
|
||||
title: "Bleeding, clotting, cancer.",
|
||||
url: "http://raypeat.com/articles/aging/bleeding-clotting-cancer.shtml",
|
||||
slug: "bleeding-clotting-cancer",
|
||||
},
|
||||
{
|
||||
title: "Blocking Tissue Destruction.",
|
||||
url: "http://raypeat.com/articles/articles/tissue-destruction.shtml",
|
||||
slug: "tissue-destruction",
|
||||
},
|
||||
{
|
||||
title: "Bone Density: First Do No Harm.",
|
||||
url: "http://raypeat.com/articles/aging/bonedensity.shtml",
|
||||
slug: "bonedensity",
|
||||
},
|
||||
{
|
||||
title: "Breast Cancer.",
|
||||
url: "http://raypeat.com/articles/aging/breastcancer.shtml",
|
||||
slug: "breastcancer",
|
||||
},
|
||||
{
|
||||
title: 'BSE ("mad cow"), scrapie, etc.: Stimulated amyloid degeneration and the toxic fats.',
|
||||
url: "http://raypeat.com/articles/aging/madcow.shtml",
|
||||
slug: "madcow",
|
||||
},
|
||||
{
|
||||
title:
|
||||
"Caffeine: A vitamin-like nutrient, or adaptogen. Questions about tea and coffee, cancer and other degenerative diseases, and the hormones.",
|
||||
url: "http://raypeat.com/articles/articles/caffeine.shtml",
|
||||
slug: "caffeine",
|
||||
},
|
||||
{
|
||||
title: "Calcium and Disease: Hypertension, organ calcification, & shock, vs. respiratory energy",
|
||||
url: "http://raypeat.com/articles/articles/calcium.shtml",
|
||||
slug: "calcium",
|
||||
},
|
||||
{
|
||||
title: "Cancer: Disorder and Energy",
|
||||
url: "http://raypeat.com/articles/articles/cancer-disorder-energy.shtml",
|
||||
slug: "cancer-disorder-energy",
|
||||
},
|
||||
{
|
||||
title: "Cancer: Disorder and Energy",
|
||||
url: "http://raypeat.com/articles/articles/cancer-disorder-energy.shtml",
|
||||
slug: "cancer-disorder-energy",
|
||||
},
|
||||
{
|
||||
title: "Cascara, energy, cancer and the FDA's laxative abuse",
|
||||
url: "http://raypeat.com/articles/articles/cascara-energy-cancer-fda-laxative-abuse.shtml",
|
||||
slug: "cascara-energy-cancer-fda-laxative-abuse",
|
||||
},
|
||||
{
|
||||
title: "Cataracts: water, energy, light, and aging",
|
||||
url: "http://raypeat.com/articles/articles/cataracts-water-energy-light-aging.shtml",
|
||||
slug: "cataracts-water-energy-light-aging",
|
||||
},
|
||||
{
|
||||
title: "Cholesterol, longevity, intelligence, and health.",
|
||||
url: "http://raypeat.com/articles/articles/cholesterol-longevity.shtml",
|
||||
slug: "cholesterol-longevity",
|
||||
},
|
||||
{
|
||||
title: "Coconut Oil.",
|
||||
url: "http://raypeat.com/articles/articles/coconut-oil.shtml",
|
||||
slug: "coconut-oil",
|
||||
},
|
||||
{
|
||||
title: "Diabetes, scleroderma, oils and hormones.",
|
||||
url: "http://raypeat.com/articles/articles/diabetes.shtml",
|
||||
slug: "diabetes",
|
||||
},
|
||||
{
|
||||
title: "Eclampsia in the Real Organism: A Paradigm of General Distress Applicable in Infants, Adults, Etc.",
|
||||
url: "http://raypeat.com/articles/aging/eclampsia.shtml",
|
||||
slug: "eclampsia",
|
||||
},
|
||||
{
|
||||
title: "Epilepsy and Progesterone.",
|
||||
url: "http://raypeat.com/articles/articles/epilepsy-progesterone.shtml",
|
||||
slug: "epilepsy-progesterone",
|
||||
},
|
||||
{
|
||||
title: "Estriol, DES, DDT, etc.",
|
||||
url: "http://raypeat.com/articles/aging/estriol-des-ddt.shtml",
|
||||
slug: "estriol-des-ddt",
|
||||
},
|
||||
{
|
||||
title: "Estrogen - Age Stress Hormone.",
|
||||
url: "http://raypeat.com/articles/articles/estrogen-age-stress.shtml",
|
||||
slug: "estrogen-age-stress",
|
||||
},
|
||||
{
|
||||
title: "Estrogen and Osteoporosis.",
|
||||
url: "http://raypeat.com/articles/articles/estrogen-osteoporosis.shtml",
|
||||
slug: "estrogen-osteoporosis",
|
||||
},
|
||||
{
|
||||
title: "Estrogen Receptors - what do they explain?",
|
||||
url: "http://raypeat.com/articles/articles/pdf/Estrogen-Receptors-what-do-they-explain.pdf",
|
||||
slug: "http://raypeat.com/articles/articles/pdf/",
|
||||
},
|
||||
{
|
||||
title: "Estrogen, memory and heredity: Imprinting and the stress response",
|
||||
url: "http://raypeat.com/articles/articles/imprinting.shtml",
|
||||
slug: "imprinting",
|
||||
},
|
||||
{
|
||||
title: "Estrogen, progesterone, and cancer: Conflicts of interest in regulation and product promotion",
|
||||
url: "http://raypeat.com/articles/articles/estrogen-progesterone-cancer.shtml",
|
||||
slug: "estrogen-progesterone-cancer",
|
||||
},
|
||||
{
|
||||
title: "Fatigue, aging, and recuperation",
|
||||
url: "http://raypeat.com/articles/articles/fatigue-aging-recuperation.shtml",
|
||||
slug: "fatigue-aging-recuperation",
|
||||
},
|
||||
{
|
||||
title: "Fats and degeneration.",
|
||||
url: "http://raypeat.com/articles/articles/fats-degeneration3.shtml",
|
||||
slug: "fats-degeneration3",
|
||||
},
|
||||
{
|
||||
title: "Fats, functions & malfunctions",
|
||||
url: "http://raypeat.com/articles/articles/fats-functions-malfunctions.shtml",
|
||||
slug: "fats-functions-malfunctions",
|
||||
},
|
||||
{
|
||||
title: "Food-junk and some mystery ailments: Fatigue, Alzheimer's, Colitis, Immunodeficiency.",
|
||||
url: "http://raypeat.com/articles/nutrition/carrageenan.shtml",
|
||||
slug: "carrageenan",
|
||||
},
|
||||
{
|
||||
title: "Gelatin, stress, longevity",
|
||||
url: "http://raypeat.com/articles/articles/gelatin.shtml",
|
||||
slug: "gelatin",
|
||||
},
|
||||
{
|
||||
title: "Genes, Carbon Dioxide and Adaptation",
|
||||
url: "http://raypeat.com/articles/articles/genes-carbon-dioxide-adaptation.shtml",
|
||||
slug: "genes-carbon-dioxide-adaptation",
|
||||
},
|
||||
{
|
||||
title: "Glucose and sucrose for diabetes",
|
||||
url: "http://raypeat.com/articles/articles/glucose-sucrose-diabetes.shtml",
|
||||
slug: "glucose-sucrose-diabetes",
|
||||
},
|
||||
{
|
||||
title: "Glycemia, starch, and sugar in context",
|
||||
url: "http://raypeat.com/articles/articles/glycemia.shtml",
|
||||
slug: "glycemia",
|
||||
},
|
||||
{
|
||||
title: "Growth hormone: Hormone of Stress, Aging, and Death?",
|
||||
url: "http://raypeat.com/articles/articles/growth-hormone.shtml",
|
||||
slug: "growth-hormone",
|
||||
},
|
||||
{
|
||||
title: "Heart and hormones",
|
||||
url: "http://raypeat.com/articles/articles/heart-hormones.shtml",
|
||||
slug: "heart-hormones",
|
||||
},
|
||||
{
|
||||
title: "Hot flashes, energy, and aging",
|
||||
url: "http://raypeat.com/articles/articles/hot-flashes-energy-aging.shtml",
|
||||
slug: "hot-flashes-energy-aging",
|
||||
},
|
||||
{
|
||||
title: "How do you know? Students, patients, and discovery",
|
||||
url: "http://raypeat.com/articles/articles/howdoyouknow.shtml",
|
||||
slug: "howdoyouknow",
|
||||
},
|
||||
{
|
||||
title: "Immunodeficiency, dioxins, stress, and the hormones.",
|
||||
url: "http://raypeat.com/articles/articles/immunodeficiency.shtml",
|
||||
slug: "immunodeficiency",
|
||||
},
|
||||
{
|
||||
title: "Intelligence and metabolism",
|
||||
url: "http://raypeat.com/articles/articles/intelligence.shtml",
|
||||
slug: "intelligence",
|
||||
},
|
||||
{
|
||||
title: "Intuitive knowledge and its development",
|
||||
url: "http://raypeat.com/articles/articles/intuitive-knowledge.shtml",
|
||||
slug: "intuitive-knowledge",
|
||||
},
|
||||
{
|
||||
title: "Iron's Dangers.",
|
||||
url: "http://raypeat.com/articles/articles/iron-dangers.shtml",
|
||||
slug: "iron-dangers",
|
||||
},
|
||||
{
|
||||
title: "Lactate vs. CO2 in wounds, sickness, and aging; the other approach to cancer",
|
||||
url: "http://raypeat.com/articles/articles/lactate.shtml",
|
||||
slug: "lactate",
|
||||
},
|
||||
{
|
||||
title: "Leakiness, aging, and cancer.",
|
||||
url: "http://raypeat.com/articles/articles/leakiness.shtml",
|
||||
slug: "leakiness",
|
||||
},
|
||||
{
|
||||
title: "Meat physiology, stress, and degenerative physiology",
|
||||
url: "http://raypeat.com/articles/articles/meat-physiology-stress.shtml",
|
||||
slug: "meat-physiology-stress",
|
||||
},
|
||||
{
|
||||
title: "Membranes, plasma membranes, and surfaces",
|
||||
url: "http://raypeat.com/articles/articles/membranes.shtml",
|
||||
slug: "membranes",
|
||||
},
|
||||
{
|
||||
title: "Menopause and its causes.",
|
||||
url: "http://raypeat.com/articles/articles/menopause.shtml",
|
||||
slug: "menopause",
|
||||
},
|
||||
{
|
||||
title: "Milk in context: allergies, ecology, and some myths",
|
||||
url: "http://raypeat.com/articles/articles/milk.shtml",
|
||||
slug: "milk",
|
||||
},
|
||||
{
|
||||
title: "Mitochondria and mortality",
|
||||
url: "http://raypeat.com/articles/articles/mitochondria-mortality.shtml",
|
||||
slug: "mitochondria-mortality",
|
||||
},
|
||||
{
|
||||
title: "Multiple Sclerosis and other hormone related brain syndromes",
|
||||
url: "http://raypeat.com/articles/articles/multiple-sclerosis-hormone-related-brain-syndromes.shtml",
|
||||
slug: "multiple-sclerosis-hormone-related-brain-syndromes",
|
||||
},
|
||||
{
|
||||
title: "Multiple Sclerosis and other hormone-related brain syndromes",
|
||||
url: "http://raypeat.com/articles/articles/multiple-sclerosis.shtml",
|
||||
slug: "multiple-sclerosis",
|
||||
},
|
||||
{
|
||||
title: "Multiple sclerosis, protein, fats, and progesterone",
|
||||
url: "http://raypeat.com/articles/articles/ms.shtml",
|
||||
slug: "ms",
|
||||
},
|
||||
{
|
||||
title: "Natural Estrogens",
|
||||
url: "http://raypeat.com/articles/articles/natural-estrogens.shtml",
|
||||
slug: "natural-estrogens",
|
||||
},
|
||||
{
|
||||
title: "Oils in Context.",
|
||||
url: "http://raypeat.com/articles/nutrition/oils-in-context.shtml",
|
||||
slug: "oils-in-context",
|
||||
},
|
||||
{
|
||||
title: "Osteoporosis, aging, tissue renewal, and product science",
|
||||
url: "http://raypeat.com/articles/articles/osteoporosis-aging.shtml",
|
||||
slug: "osteoporosis-aging",
|
||||
},
|
||||
{
|
||||
title: "Osteoporosis, harmful calcification, and nerve/muscle malfunctions.",
|
||||
url: "http://raypeat.com/articles/articles/osteoporosis.shtml",
|
||||
slug: "osteoporosis",
|
||||
},
|
||||
{
|
||||
title: "Pathological Science & General Electric: Threatening the paradigm",
|
||||
url: "http://raypeat.com/articles/articles/pathological-science-general-electric.shtml",
|
||||
slug: "pathological-science-general-electric",
|
||||
},
|
||||
{
|
||||
title: "Phosphate, activation, and aging",
|
||||
url: "http://raypeat.com/articles/articles/phosphate-activation-aging.shtml",
|
||||
slug: "phosphate-activation-aging",
|
||||
},
|
||||
{
|
||||
title: "Physiology texts and the real world",
|
||||
url: "http://raypeat.com/articles/articles/physiology-texts-and-the-real-world.shtml",
|
||||
slug: "physiology-texts-and-the-real-world",
|
||||
},
|
||||
{
|
||||
title: "Preventing and treating cancer with progesterone.",
|
||||
url: "http://raypeat.com/articles/articles/cancer-progesterone.shtml",
|
||||
slug: "cancer-progesterone",
|
||||
},
|
||||
{
|
||||
title: "Progesterone Deceptions",
|
||||
url: "http://raypeat.com/articles/articles/progesterone-deceptions.shtml",
|
||||
slug: "progesterone-deceptions",
|
||||
},
|
||||
{
|
||||
title: "Progesterone Pregnenolone & DHEA - Three Youth-Associated Hormones.",
|
||||
url: "http://raypeat.com/articles/articles/three-hormones.shtml",
|
||||
slug: "three-hormones",
|
||||
},
|
||||
{
|
||||
title: "Progesterone Summaries",
|
||||
url: "http://raypeat.com/articles/articles/progesterone-summaries.shtml",
|
||||
slug: "progesterone-summaries",
|
||||
},
|
||||
{
|
||||
title: "Progesterone, not estrogen, is the coronary protection factor of women.",
|
||||
url: "http://raypeat.com/articles/aging/coronaryprogesterone.shtml",
|
||||
slug: "coronaryprogesterone",
|
||||
},
|
||||
{
|
||||
title: "Prostate Cancer",
|
||||
url: "http://raypeat.com/articles/articles/prostate-cancer.shtml",
|
||||
slug: "prostate-cancer",
|
||||
},
|
||||
{
|
||||
title: "Protective CO2 and aging",
|
||||
url: "http://raypeat.com/articles/articles/co2.shtml",
|
||||
slug: "co2",
|
||||
},
|
||||
{
|
||||
title: "Protective CO2 and aging",
|
||||
url: "http://raypeat.com/articles/articles/protective-co2-aging.shtml",
|
||||
slug: "protective-co2-aging",
|
||||
},
|
||||
{
|
||||
title: "Regeneration and degeneration: Types of inflammation change with aging",
|
||||
url: "http://raypeat.com/articles/articles/regeneration-degeneration.shtml",
|
||||
slug: "regeneration-degeneration",
|
||||
},
|
||||
{
|
||||
title: "Rosacea, inflammation, and aging: The inefficiency of stress",
|
||||
url: "http://raypeat.com/articles/articles/rosacea-inflammation-aging.shtml",
|
||||
slug: "rosacea-inflammation-aging",
|
||||
},
|
||||
{
|
||||
title: "RU486, Cancer, Estrogen, and Progesterone",
|
||||
url: "http://raypeat.com/articles/articles/ru486.shtml",
|
||||
slug: "ru486",
|
||||
},
|
||||
{
|
||||
title: "Salt, energy, metabolic rate, and longevity",
|
||||
url: "http://raypeat.com/articles/articles/salt.shtml",
|
||||
slug: "salt",
|
||||
},
|
||||
{
|
||||
title: "Serotonin, depression, and aggression: The problem of brain energy",
|
||||
url: "http://raypeat.com/articles/articles/serotonin-depression-aggression.shtml",
|
||||
slug: "serotonin-depression-aggression",
|
||||
},
|
||||
{
|
||||
title: "Serotonin: Effects in disease, aging and inflammation",
|
||||
url: "http://raypeat.com/articles/articles/serotonin-disease-aging-inflammation.shtml",
|
||||
slug: "serotonin-disease-aging-inflammation",
|
||||
},
|
||||
{
|
||||
title: "Stem cells, cell culture, and culture: Issues in regeneration",
|
||||
url: "http://raypeat.com/articles/articles/stemcells.shtml",
|
||||
slug: "stemcells",
|
||||
},
|
||||
{
|
||||
title: "Sugar issues",
|
||||
url: "http://raypeat.com/articles/articles/sugar-issues.shtml",
|
||||
slug: "sugar-issues",
|
||||
},
|
||||
{
|
||||
title: "Suitable Fats, Unsuitable Fats: Issues in Nutrition",
|
||||
url: "http://raypeat.com/articles/articles/unsuitablefats.shtml",
|
||||
slug: "unsuitablefats",
|
||||
},
|
||||
{
|
||||
title: "The Cancer Matrix",
|
||||
url: "http://raypeat.com/articles/articles/the-cancer-matrix.shtml",
|
||||
slug: "the-cancer-matrix",
|
||||
},
|
||||
{
|
||||
title: "The dark side of stress (learned helplesness)",
|
||||
url: "http://raypeat.com/articles/articles/dark-side-of-stress-learned-helplessness.shtml",
|
||||
slug: "dark-side-of-stress-learned-helplessness",
|
||||
},
|
||||
{
|
||||
title: "The Great Fish Oil Experiment",
|
||||
url: "http://raypeat.com/articles/articles/fishoil.shtml",
|
||||
slug: "fishoil",
|
||||
},
|
||||
{
|
||||
title: "The transparency of life: Cataracts as a model of age-related disease.",
|
||||
url: "http://raypeat.com/articles/aging/transparency-cataracts.shtml",
|
||||
slug: "transparency-cataracts",
|
||||
},
|
||||
{
|
||||
title: "Thyroid, insomnia, and the insanities: Commonalities in disease",
|
||||
url: "http://raypeat.com/articles/articles/thyroid-insanities.shtml",
|
||||
slug: "thyroid-insanities",
|
||||
},
|
||||
{
|
||||
title: "Thyroid: Therapies, Confusion, and Fraud.",
|
||||
url: "http://raypeat.com/articles/articles/thyroid.shtml",
|
||||
slug: "thyroid",
|
||||
},
|
||||
{
|
||||
title: "Tissue-bound estrogen in aging",
|
||||
url: "http://raypeat.com/articles/articles/tissue-bound-estrogen.shtml",
|
||||
slug: "tissue-bound-estrogen",
|
||||
},
|
||||
{
|
||||
title: "Tryptophan, serotonin, and aging.",
|
||||
url: "http://raypeat.com/articles/aging/tryptophan-serotonin-aging.shtml",
|
||||
slug: "tryptophan-serotonin-aging",
|
||||
},
|
||||
{
|
||||
title: "Unsaturated Vegetable Oils: Toxic.",
|
||||
url: "http://raypeat.com/articles/articles/unsaturated-oils.shtml",
|
||||
slug: "unsaturated-oils",
|
||||
},
|
||||
{
|
||||
title: "Vegetables, etc. - Who Defines Food?",
|
||||
url: "http://raypeat.com/articles/articles/vegetables.shtml",
|
||||
slug: "vegetables",
|
||||
},
|
||||
{
|
||||
title: "Vitamin E: Estrogen antagonist, energy promoter, and anti-inflammatory",
|
||||
url: "http://raypeat.com/articles/articles/vitamin-e.shtml",
|
||||
slug: "vitamin-e",
|
||||
},
|
||||
{
|
||||
title: "Water: swelling, tension, pain, fatigue, aging",
|
||||
url: "http://raypeat.com/articles/articles/water.shtml",
|
||||
slug: "water",
|
||||
},
|
||||
{
|
||||
title: "When energy fails: Edema, heart failure, hypertension, sarcopenia, etc.",
|
||||
url: "http://raypeat.com/articles/articles/edema-heart-failure-hypertension-sarcopenia.shtml",
|
||||
slug: "edema-heart-failure-hypertension-sarcopenia",
|
||||
},
|
||||
{
|
||||
title: "William Blake as biological visionary. Can art instruct science?",
|
||||
url: "http://raypeat.com/articles/articles/william-blake.shtml",
|
||||
slug: "william-blake",
|
||||
},
|
||||
];
|
||||
|
||||
export default articles;
|
||||
177
raypeat-articles/process-and-cleanup.ts
Normal file
177
raypeat-articles/process-and-cleanup.ts
Normal file
@@ -0,0 +1,177 @@
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import articles from "./articles";
|
||||
import jsdom from "jsdom";
|
||||
|
||||
const DUMPS_LOCATION = "article-dumps";
|
||||
const PROCESSED_LOCATION = "processed-articles";
|
||||
|
||||
export async function scrapeArticlesMainContent() {
|
||||
const promises = articles.map((article) => {
|
||||
return new Promise<string>(async (resolve, reject) => {
|
||||
let text;
|
||||
try {
|
||||
text = await (await fetch(article.url)).text();
|
||||
} catch (e) {
|
||||
console.log("e:", article.url);
|
||||
reject(`error occurred with this one: ${e}`);
|
||||
return;
|
||||
}
|
||||
resolve(text);
|
||||
});
|
||||
});
|
||||
|
||||
const results = await Promise.allSettled(promises);
|
||||
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
const result = results[i];
|
||||
if (result.status === "rejected") {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
const dom = new jsdom.JSDOM(result.value);
|
||||
fs.writeFile(
|
||||
path.resolve(".", DUMPS_LOCATION, articles[i].slug + ".html"),
|
||||
dom.window.document.body.getElementsByClassName("entries").item(0)?.innerHTML ?? "",
|
||||
);
|
||||
} catch (e) {
|
||||
console.log("d:", articles[i].url);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function deleteChildCommentsRecursive(node: Node) {
|
||||
for (const child of node.childNodes) {
|
||||
// comment node type === 8 === Node.COMMENT_NODE but not available here
|
||||
if (child.nodeType === 8) {
|
||||
child.remove();
|
||||
} else {
|
||||
deleteChildCommentsRecursive(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function setAsTitleIfContainsArticle(doc: Document, node: Node) {
|
||||
const el = node as HTMLSpanElement;
|
||||
if (el.innerHTML?.includes("A R T I C L E")) {
|
||||
el.innerHTML = el.innerHTML.replace("A R T I C L E", "");
|
||||
el.replaceWith(Object.assign(doc.createElement("h1"), { innerHTML: el.innerHTML }));
|
||||
let existingTitle = doc.head.querySelector("title");
|
||||
if (!existingTitle) {
|
||||
existingTitle = doc.createElement("title");
|
||||
}
|
||||
doc.head.appendChild(Object.assign(doc.createElement("title"), { innerHTML: el.innerHTML.trim() }));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
type ReplcementEntry = {
|
||||
tag?: string | null;
|
||||
attrs?: Partial<Record<string, any>>;
|
||||
extra?: (doc: Document, node: Node) => boolean;
|
||||
};
|
||||
|
||||
const selectorReplacementMap: Record<string, ReplcementEntry | null> = {
|
||||
"title": null,
|
||||
"ul": {
|
||||
tag: null,
|
||||
},
|
||||
"i": {
|
||||
tag: "em",
|
||||
},
|
||||
"font": {
|
||||
tag: null,
|
||||
},
|
||||
"p": {
|
||||
tag: "p",
|
||||
},
|
||||
"div": {
|
||||
tag: null,
|
||||
},
|
||||
"img": null,
|
||||
"br": null,
|
||||
"wbr": null,
|
||||
"b": {
|
||||
tag: "strong",
|
||||
extra: setAsTitleIfContainsArticle,
|
||||
},
|
||||
"center": null,
|
||||
"hr": {
|
||||
tag: "hr",
|
||||
},
|
||||
"table": null,
|
||||
"span.title": {
|
||||
tag: "header",
|
||||
attrs: {
|
||||
className: "title",
|
||||
},
|
||||
extra: setAsTitleIfContainsArticle,
|
||||
},
|
||||
"span.posted": {
|
||||
tag: "article",
|
||||
attrs: {
|
||||
className: "posted",
|
||||
},
|
||||
},
|
||||
} as const;
|
||||
|
||||
function forEachTextNode(doc: Document, root: Node, cb: (doc: Document, node: Text) => void) {
|
||||
for (const child of root.childNodes) {
|
||||
// text node type === 3 === Node.TEXT_NODE but not available here
|
||||
if (child.nodeType === 3) {
|
||||
cb(doc, child as Text);
|
||||
} else {
|
||||
forEachTextNode(doc, child, cb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function cleanupFile(fileName: string) {
|
||||
const filePath = path.resolve(".", DUMPS_LOCATION, fileName);
|
||||
const { window } = new jsdom.JSDOM((await fs.readFile(filePath)).toString());
|
||||
const document = window.document;
|
||||
for (const selector in selectorReplacementMap) {
|
||||
const replacement = selectorReplacementMap[selector];
|
||||
for (const node of document.querySelectorAll(selector).values()) {
|
||||
if (replacement) {
|
||||
if (replacement.extra?.(document, node)) {
|
||||
continue;
|
||||
}
|
||||
const newNode = replacement.tag
|
||||
? document.createElement(replacement.tag)
|
||||
: document.createDocumentFragment();
|
||||
newNode.replaceChildren(...node.childNodes);
|
||||
Object.assign(newNode, { ...replacement.attrs ?? {} });
|
||||
node.replaceWith(newNode);
|
||||
} else {
|
||||
node.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
forEachTextNode(document, document.documentElement, (doc: Document, node: Text) => {
|
||||
if (node.textContent?.match(/\s*=+\s*/)) {
|
||||
node.replaceWith(doc.createElement("hr"));
|
||||
return true;
|
||||
} else if (node.textContent?.includes("REFERENCES")) {
|
||||
node.replaceWith(Object.assign(doc.createElement("h3"), { innerHTML: node.textContent.trim() }));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
deleteChildCommentsRecursive(document.documentElement);
|
||||
fs.writeFile(
|
||||
path.resolve(".", PROCESSED_LOCATION, fileName),
|
||||
document.documentElement.outerHTML.replaceAll(/<2F>/g, '"'),
|
||||
);
|
||||
}
|
||||
|
||||
async function cleanup() {
|
||||
const promises: Promise<unknown>[] = [];
|
||||
for (const fileName of await fs.readdir(path.resolve(".", "article-dumps"))) {
|
||||
promises.push(cleanupFile(fileName));
|
||||
}
|
||||
await Promise.allSettled(promises);
|
||||
}
|
||||
|
||||
cleanup();
|
||||
Reference in New Issue
Block a user