Mercurial Hosting > shareasale
diff data/build.luan @ 1:bd2abcd7190a
mostly done
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 20 Sep 2022 19:40:39 -0600 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data/build.luan Tue Sep 20 19:40:39 2022 -0600 @@ -0,0 +1,109 @@ +local Luan = require "luan:Luan.luan" +local error = Luan.error +local ipairs = Luan.ipairs or error() +local type = Luan.type or error() +local stringify = Luan.stringify or error() +local Io = require "luan:Io.luan" +local uri = Io.uri or error() +local print = Io.print or error() +local String = require "luan:String.luan" +local starts_with = String.starts_with or error() +local Html = require "luan:Html.luan" +local html_parse = Html.parse or error() +local Table = require "luan:Table.luan" +local copy = Table.copy or error() + +function Io.schemes.site(path) + return uri( "file:../src"..path ) +end + +local Db = require "site:/lib/Db.luan" + +Db.delete_all() + + +local function get_user_htmls(html) + local rtn = {} + local n = #html + local i = 1 + local el + while true do + while true do + if i > n then + return rtn + end + el = html[i] + if type(el)=="table" and el.type=="tag" and el.name=="div" and el.attributes.userid~=nil then + break + end + i = i + 1 + end + local start = i + local nesting = 0 + repeat + if type(el)=="table" and el.type=="tag" then + if el.name=="div" then + nesting = nesting + 1 + elseif el.name=="/div" then + nesting = nesting - 1 + end + end + i = i + 1 + el = html[i] + until nesting==0 + rtn[#rtn+1] = copy(html,start,i-1) + end +end + +local function process_user_html(html) +--print(stringify(html[1])) + local user_id = html[1].attributes.userid or error() + if Db.count("user_id:"..user_id) > 0 then + return + end + local doc = {} + doc.user_id = user_id + for i, el in ipairs(html) do + if not (type(el)=="table" and el.type=="tag") then + continue + end + local name = el.name + local attributes = el.attributes + local class = attributes.class + if name=="span" and class=="organization" then + local user_name = html[i+1] + if type(user_name)=="table" and user_name.type=="tag" and user_name.name=="/span" then + user_name = "" + end + doc.user_name = user_name + elseif name=="a" and class~=nil and starts_with(class,"afftag ") then + doc.category = doc.category or {} + doc.category[#doc.category+1] = html[i+1] + elseif name=="section" and class=="description" then + doc.description = html[i+1] + elseif name=="a" and class=="aff-website" then + doc.websites = doc.websites or {} + doc.websites[#doc.websites+1] = attributes.href + end + end + Db.save(doc) +end + +local pages_dir = uri "file:pages" + +for _, dir in ipairs(pages_dir.children()) do + if starts_with( dir.name(), "." ) then + continue + end + --print(dir.name()) + for _, file in ipairs(dir.children()) do + local text = file.read_text() + local html = html_parse(text) + local user_htmls = get_user_htmls(html) + print(file.name().." "..#user_htmls) + for _, user_html in ipairs(user_htmls) do + process_user_html(user_html) + end + --break + end +end