add text section

This commit is contained in:
sam 2024-03-10 02:37:07 +13:00
parent 1bd53b8907
commit fcbe2db038
2 changed files with 12 additions and 10 deletions

View file

@ -23,13 +23,15 @@ async function crawl(url, ignoreExisting = false) {
console.log(`[${res.status} ${res.statusText}] ${url}`); console.log(`[${res.status} ${res.statusText}] ${url}`);
if(res.status == 200) { if(res.status == 200) {
const $ = cheerio.load(await res.text()); const html = await res.text();
const $ = cheerio.load(html);
const title = $("title").text(); const title = $("title").text();
const text = $.text().replace(/\s+/g, " ").trim();
console.log(`[TITLE] ${title} (${url})`); console.log(`[TITLE] ${title} (${url})`);
db.run(`INSERT INTO pages(url, title) db.run(`INSERT INTO pages(url, title, text)
VALUES(?, ?)`, url, title, (err) => { VALUES(?, ?, ?)`, url, title, text, (err) => {
if(err) { if(err) {
console.log(`[INSERT FAIL: ${err}] ${url}|${title}`); console.log(`[INSERT FAIL: ${err}] ${url}|${title}`);
} else { } else {

12
db.js
View file

@ -3,20 +3,20 @@ const db = new sqlite3.Database('index.db');
db.createTables = function() { db.createTables = function() {
this.exec(` this.exec(`
CREATE TABLE IF NOT EXISTS pages(id INTEGER PRIMARY KEY, url TEXT NOT NULL UNIQUE, title TEXT); CREATE TABLE IF NOT EXISTS pages(id INTEGER PRIMARY KEY, url TEXT NOT NULL UNIQUE, title TEXT, text TEXT);
CREATE VIRTUAL TABLE IF NOT EXISTS page_search USING fts5(url, title, content=pages, content_rowid=id); CREATE VIRTUAL TABLE IF NOT EXISTS page_search USING fts5(url, title, text, content=pages, content_rowid=id);
CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN CREATE TRIGGER IF NOT EXISTS pages_ai AFTER INSERT ON pages BEGIN
INSERT INTO page_search(rowid, url, title) VALUES (new.id, new.url, new.title); INSERT INTO page_search(rowid, url, title, text) VALUES (new.id, new.url, new.title, new.text);
END; END;
CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN CREATE TRIGGER IF NOT EXISTS pages_ad AFTER DELETE ON pages BEGIN
INSERT INTO page_search(page_search, rowid, url, title) VALUES ('delete', old.id, old.url, old.title); INSERT INTO page_search(page_search, rowid, url, title, text) VALUES ('delete', old.id, old.url, old.title, old.text);
END; END;
CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN CREATE TRIGGER IF NOT EXISTS pages_au AFTER UPDATE ON pages BEGIN
INSERT INTO page_search(page_search, rowid, url, title) VALUES ('delete', old.id, old.url, old.title); INSERT INTO page_search(page_search, rowid, url, title, text) VALUES ('delete', old.id, old.url, old.title, old.text);
INSERT INTO page_search(rowid, url, title) VALUES (new.id, new.url, new.title); INSERT INTO page_search(rowid, url, title, text) VALUES (new.id, new.url, new.title, new.text);
END; END;
`); `);
console.log("[SQL] Tables created."); console.log("[SQL] Tables created.");