add text section

This commit is contained in:
sam 2024-03-10 02:37:07 +13:00
parent 1bd53b8907
commit fcbe2db038
2 changed files with 12 additions and 10 deletions

View file

@ -23,13 +23,15 @@ async function crawl(url, ignoreExisting = false) {
console.log(`[${res.status} ${res.statusText}] ${url}`);
if(res.status == 200) {
const $ = cheerio.load(await res.text());
const html = await res.text();
const $ = cheerio.load(html);
const title = $("title").text();
const text = $.text().replace(/\s+/g, " ").trim();
console.log(`[TITLE] ${title} (${url})`);
db.run(`INSERT INTO pages(url, title)
VALUES(?, ?)`, url, title, (err) => {
db.run(`INSERT INTO pages(url, title, text)
VALUES(?, ?, ?)`, url, title, text, (err) => {
if(err) {
console.log(`[INSERT FAIL: ${err}] ${url}|${title}`);
} else {

12
db.js
View file

@ -3,20 +3,20 @@ const db = new sqlite3.Database('index.db');
db.createTables = function() {
this.exec(`
CREATE TABLE IF NOT EXISTS pages(id INTEGER PRIMARY KEY, url TEXT NOT NULL UNIQUE, title TEXT);
CREATE VIRTUAL TABLE IF NOT EXISTS page_search USING fts5(url, title, content=pages, content_rowid=id);
CREATE TABLE IF NOT EXISTS pages(id INTEGER PRIMARY KEY, url TEXT NOT NULL UNIQUE, title TEXT, text TEXT);
CREATE VIRTUAL TABLE IF NOT EXISTS page_search USING fts5(url, title, text, content=pages, content_rowid=id);
CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
INSERT INTO page_search(rowid, url, title) VALUES (new.id, new.url, new.title);
INSERT INTO page_search(rowid, url, title, text) VALUES (new.id, new.url, new.title, new.text);
END;
CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
INSERT INTO page_search(page_search, rowid, url, title) VALUES ('delete', old.id, old.url, old.title);
INSERT INTO page_search(page_search, rowid, url, title, text) VALUES ('delete', old.id, old.url, old.title, old.text);
END;
CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
INSERT INTO page_search(page_search, rowid, url, title) VALUES ('delete', old.id, old.url, old.title);
INSERT INTO page_search(rowid, url, title) VALUES (new.id, new.url, new.title);
INSERT INTO page_search(page_search, rowid, url, title, text) VALUES ('delete', old.id, old.url, old.title, old.text);
INSERT INTO page_search(rowid, url, title, text) VALUES (new.id, new.url, new.title, new.text);
END;
`);
console.log("[SQL] Tables created.");