Loading src/html.cpp +132 −101 Original line number Diff line number Diff line Loading @@ -233,8 +233,10 @@ libhtmlpp::Element &libhtmlpp::HtmlString::parse() { return *_rootEl; } void libhtmlpp::HtmlString::_buildtreenode(DocElements *firstel, libhtmlpp::DocElements *lastel) void libhtmlpp::HtmlString::_buildtreenode( DocElements *firstel, libhtmlpp::DocElements *lastel, std::unique_ptr<Element> &html) { if (!firstel || !lastel) { HTMLException excp; Loading @@ -242,125 +244,156 @@ void libhtmlpp::HtmlString::_buildtreenode(DocElements *firstel, throw excp; } struct cpyel { cpyel() : start(nullptr), end(nullptr) {} cpyel(const cpyel &src) : start(src.start), end(src.end) {} libhtmlpp::DocElements *start; libhtmlpp::DocElements *end; struct Frame { DocElements *open; // opening tag holder DocElements *close; // its matching terminator const DocElements *outer_end; // end bound of the parent span }; std::stack<Frame> stack; std::stack<cpyel> cpylist; const DocElements *prev=nullptr; DocElements *start = firstel; DocElements *next=firstel->nextel.get(); const DocElements *end = lastel; Element *prev_el = nullptr; // last attached Element in the current span auto skip_empty = [&](DocElements* cur, const DocElements* stop) -> DocElements* { auto skip_empty = [](DocElements *cur, const DocElements *stop) -> DocElements* { while (cur && cur != stop && (!cur->element)) { cur = cur->nextel.get(); } return cur; }; auto checkterminator = [&](DocElements* termel, const DocElements* end) -> DocElements* { if (!termel || !termel->element || termel->terminator || termel->element->getType() != HtmlEl) auto find_terminator = [&skip_empty](DocElements *open, const DocElements *bound) -> DocElements* { if (!open || !open->element || open->terminator || open->element->getType() != HtmlEl) return nullptr; const std::string &tagname = dynamic_cast<HtmlElement*>(termel->element.get())->getTagname(); int nesting_level = 0; const std::string &tag = dynamic_cast<HtmlElement*>(open->element.get())->getTagname(); for (DocElements* cur = skip_empty(termel->nextel.get(), end); cur && cur != end; cur = skip_empty(cur->nextel.get(), end)) { if (!cur->element) continue; int nest = 0; for (DocElements *cur = skip_empty(open->nextel.get(), bound); cur; cur = skip_empty(cur->nextel.get(), bound)) { if (cur->element->getType() == HtmlEl) { if (cur->element && cur->element->getType() == HtmlEl) { const std::string &curtag = dynamic_cast<HtmlElement*>(cur->element.get())->getTagname(); static_cast<HtmlElement*>(cur->element.get())->getTagname(); if (curtag == tagname) { if (curtag == tag) { if (cur->terminator) { if (nesting_level == 0) return cur; --nesting_level; if (nest == 0) return cur; --nest; } else { ++nesting_level; ++nest; } } } if (cur == bound) return nullptr; } // require explicit terminator for container tags // Require explicit terminator for container types for (size_t i = 0; ContainerTypes[i]; ++i) { if (tagname == ContainerTypes[i]) { if (tag == ContainerTypes[i]) { HTMLException e; e[HTMLException::Error] << tagname << " must be terminated ! " << dynamic_cast<HtmlElement*>(termel->element.get()) ->getAtributte("id"); e[HTMLException::Error] << tag << " must be terminated ! " << dynamic_cast<HtmlElement*>(open->element.get())->getAtributte("id"); throw e; } } return nullptr; }; size_t counter =0; for (;;) { start = skip_empty(start, end); NEXTDOCEL: if(!start->element){ HTMLException e; e[HTMLException::Error] << "start not found this shouldn't happend in line: " << counter << "!"; throw e; // finished current span? if (!start) { html = std::move(firstel->element); return; } ++counter; // hit this span's terminator -> restore parent scope if (start == end) { if (stack.empty()) { html = std::move(firstel->element); return; } Frame fr = stack.top(); stack.pop(); // continue after </open> in outer scope prev_el = nullptr; start = fr.close->nextel.get(); end = fr.outer_end; continue; } DocElements *parent=checkterminator(start,end); // If this is an opener with a terminator inside [start, end) if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { auto *hel = static_cast<HtmlElement*>(start->element.get()); if(prev && start->element && !start->terminator){ start->element->_prevElement=prev->element.get(); // Attach first child if present and not already attached if (!hel->_childElement && start->nextel && start->nextel->element) { std::cout << hel->getTagname() << std::endl; hel->_childElement = std::move(start->nextel->element); } if(parent && parent!=next){ ((HtmlElement*)(start->element.get()))->_childElement=std::move(next->element); cpyel childel; childel.start=next->nextel.get(); childel.end=parent; cpylist.push(childel); next=parent->nextel.get(); } // If we have a first child, we will walk/link the whole child span now if (hel->_childElement) { // Push frame to restore after children stack.push(Frame{start, close, end}); while(next && next!=end){ if(!next->terminator){ start->element->_nextElement=std::move(next->element); prev=start; start=next->nextel.get(); if(start->nextel) next=start->nextel.get(); else next=nullptr; goto NEXTDOCEL; } next=next->nextel.get(); }; // Inside child span, prev_el is the first child's Element prev_el = hel->_childElement.get(); // Walk from the physical node after the (now moved-from) first-child holder DocElements *cur = skip_empty(start->nextel.get(), close); if(!cpylist.empty()){ cpyel childel(cpylist.top()); // The first physical node after opener is now moved-from; advance one more if (cur && !cur->element) cur = skip_empty(cur->nextel.get(), close); std::cerr << childel.start->element->getType() << std::endl; // Link subsequent siblings up to (but not including) the close tag while (cur && cur != close) { // Only link non-terminator, real elements if (cur->element && !cur->terminator) { // double-link: sibling prev cur->element->_prevElement = prev_el; prev=nullptr; start=childel.start; end=childel.end; next=childel.start->nextel.get(); cpylist.pop(); goto NEXTDOCEL; // move current element into prev_el->_nextElement prev_el->_nextElement = std::move(cur->element); // advance prev_el to the element we just attached prev_el = prev_el->_nextElement.get(); } cur = skip_empty(cur->nextel.get(), close); } // dive into the children recursively by setting the current span to (open->next, close) prev_el = nullptr; start = skip_empty(start->nextel.get(), close); end = close; continue; } // No first child; fall through to normal forward progress. } } // Normal forward progress (non-container or no terminator found) if (start->element && !start->terminator) { // Chain siblings in the flat list so callers that inspect next links still see sequence if (prev_el) { start->element->_prevElement = prev_el; } if (start->nextel && start->nextel->element) { start->element->_nextElement = std::move(start->nextel->element); } prev_el = start->element.get(); // Skip over the (now moved-from) nextel holder if we consumed it start = start->nextel ? start->nextel->nextel.get() : start->nextel.get(); } else { // terminator or moved-from holder; just advance start = start->nextel.get(); } } } Loading Loading @@ -512,9 +545,7 @@ void libhtmlpp::HtmlString::_buildTree() { } } _buildtreenode(firstEl.get(),lastEl); _rootEl=std::move(firstEl->element); _buildtreenode(firstEl.get(),lastEl,_rootEl); HtmlString test; print(*_rootEl,test,true); Loading src/html.h +1 −1 Original line number Diff line number Diff line Loading @@ -290,7 +290,7 @@ namespace libhtmlpp { std::unique_ptr<Element> _rootEl; void _serialelize(std::vector<char> in, HtmlElement* out); void _buildTree(); void _buildtreenode(DocElements *firstel,DocElements *lastel); void _buildtreenode(DocElements *firstel,DocElements *lastel,std::unique_ptr<Element>&html); std::vector<char> _Data; std::string _Str; friend void HtmlEncode(const std::string &input,HtmlString *output); Loading Loading
src/html.cpp +132 −101 Original line number Diff line number Diff line Loading @@ -233,8 +233,10 @@ libhtmlpp::Element &libhtmlpp::HtmlString::parse() { return *_rootEl; } void libhtmlpp::HtmlString::_buildtreenode(DocElements *firstel, libhtmlpp::DocElements *lastel) void libhtmlpp::HtmlString::_buildtreenode( DocElements *firstel, libhtmlpp::DocElements *lastel, std::unique_ptr<Element> &html) { if (!firstel || !lastel) { HTMLException excp; Loading @@ -242,125 +244,156 @@ void libhtmlpp::HtmlString::_buildtreenode(DocElements *firstel, throw excp; } struct cpyel { cpyel() : start(nullptr), end(nullptr) {} cpyel(const cpyel &src) : start(src.start), end(src.end) {} libhtmlpp::DocElements *start; libhtmlpp::DocElements *end; struct Frame { DocElements *open; // opening tag holder DocElements *close; // its matching terminator const DocElements *outer_end; // end bound of the parent span }; std::stack<Frame> stack; std::stack<cpyel> cpylist; const DocElements *prev=nullptr; DocElements *start = firstel; DocElements *next=firstel->nextel.get(); const DocElements *end = lastel; Element *prev_el = nullptr; // last attached Element in the current span auto skip_empty = [&](DocElements* cur, const DocElements* stop) -> DocElements* { auto skip_empty = [](DocElements *cur, const DocElements *stop) -> DocElements* { while (cur && cur != stop && (!cur->element)) { cur = cur->nextel.get(); } return cur; }; auto checkterminator = [&](DocElements* termel, const DocElements* end) -> DocElements* { if (!termel || !termel->element || termel->terminator || termel->element->getType() != HtmlEl) auto find_terminator = [&skip_empty](DocElements *open, const DocElements *bound) -> DocElements* { if (!open || !open->element || open->terminator || open->element->getType() != HtmlEl) return nullptr; const std::string &tagname = dynamic_cast<HtmlElement*>(termel->element.get())->getTagname(); int nesting_level = 0; const std::string &tag = dynamic_cast<HtmlElement*>(open->element.get())->getTagname(); for (DocElements* cur = skip_empty(termel->nextel.get(), end); cur && cur != end; cur = skip_empty(cur->nextel.get(), end)) { if (!cur->element) continue; int nest = 0; for (DocElements *cur = skip_empty(open->nextel.get(), bound); cur; cur = skip_empty(cur->nextel.get(), bound)) { if (cur->element->getType() == HtmlEl) { if (cur->element && cur->element->getType() == HtmlEl) { const std::string &curtag = dynamic_cast<HtmlElement*>(cur->element.get())->getTagname(); static_cast<HtmlElement*>(cur->element.get())->getTagname(); if (curtag == tagname) { if (curtag == tag) { if (cur->terminator) { if (nesting_level == 0) return cur; --nesting_level; if (nest == 0) return cur; --nest; } else { ++nesting_level; ++nest; } } } if (cur == bound) return nullptr; } // require explicit terminator for container tags // Require explicit terminator for container types for (size_t i = 0; ContainerTypes[i]; ++i) { if (tagname == ContainerTypes[i]) { if (tag == ContainerTypes[i]) { HTMLException e; e[HTMLException::Error] << tagname << " must be terminated ! " << dynamic_cast<HtmlElement*>(termel->element.get()) ->getAtributte("id"); e[HTMLException::Error] << tag << " must be terminated ! " << dynamic_cast<HtmlElement*>(open->element.get())->getAtributte("id"); throw e; } } return nullptr; }; size_t counter =0; for (;;) { start = skip_empty(start, end); NEXTDOCEL: if(!start->element){ HTMLException e; e[HTMLException::Error] << "start not found this shouldn't happend in line: " << counter << "!"; throw e; // finished current span? if (!start) { html = std::move(firstel->element); return; } ++counter; // hit this span's terminator -> restore parent scope if (start == end) { if (stack.empty()) { html = std::move(firstel->element); return; } Frame fr = stack.top(); stack.pop(); // continue after </open> in outer scope prev_el = nullptr; start = fr.close->nextel.get(); end = fr.outer_end; continue; } DocElements *parent=checkterminator(start,end); // If this is an opener with a terminator inside [start, end) if (start->element && !start->terminator && start->element->getType() == HtmlEl) { if (DocElements *close = find_terminator(start, end)) { auto *hel = static_cast<HtmlElement*>(start->element.get()); if(prev && start->element && !start->terminator){ start->element->_prevElement=prev->element.get(); // Attach first child if present and not already attached if (!hel->_childElement && start->nextel && start->nextel->element) { std::cout << hel->getTagname() << std::endl; hel->_childElement = std::move(start->nextel->element); } if(parent && parent!=next){ ((HtmlElement*)(start->element.get()))->_childElement=std::move(next->element); cpyel childel; childel.start=next->nextel.get(); childel.end=parent; cpylist.push(childel); next=parent->nextel.get(); } // If we have a first child, we will walk/link the whole child span now if (hel->_childElement) { // Push frame to restore after children stack.push(Frame{start, close, end}); while(next && next!=end){ if(!next->terminator){ start->element->_nextElement=std::move(next->element); prev=start; start=next->nextel.get(); if(start->nextel) next=start->nextel.get(); else next=nullptr; goto NEXTDOCEL; } next=next->nextel.get(); }; // Inside child span, prev_el is the first child's Element prev_el = hel->_childElement.get(); // Walk from the physical node after the (now moved-from) first-child holder DocElements *cur = skip_empty(start->nextel.get(), close); if(!cpylist.empty()){ cpyel childel(cpylist.top()); // The first physical node after opener is now moved-from; advance one more if (cur && !cur->element) cur = skip_empty(cur->nextel.get(), close); std::cerr << childel.start->element->getType() << std::endl; // Link subsequent siblings up to (but not including) the close tag while (cur && cur != close) { // Only link non-terminator, real elements if (cur->element && !cur->terminator) { // double-link: sibling prev cur->element->_prevElement = prev_el; prev=nullptr; start=childel.start; end=childel.end; next=childel.start->nextel.get(); cpylist.pop(); goto NEXTDOCEL; // move current element into prev_el->_nextElement prev_el->_nextElement = std::move(cur->element); // advance prev_el to the element we just attached prev_el = prev_el->_nextElement.get(); } cur = skip_empty(cur->nextel.get(), close); } // dive into the children recursively by setting the current span to (open->next, close) prev_el = nullptr; start = skip_empty(start->nextel.get(), close); end = close; continue; } // No first child; fall through to normal forward progress. } } // Normal forward progress (non-container or no terminator found) if (start->element && !start->terminator) { // Chain siblings in the flat list so callers that inspect next links still see sequence if (prev_el) { start->element->_prevElement = prev_el; } if (start->nextel && start->nextel->element) { start->element->_nextElement = std::move(start->nextel->element); } prev_el = start->element.get(); // Skip over the (now moved-from) nextel holder if we consumed it start = start->nextel ? start->nextel->nextel.get() : start->nextel.get(); } else { // terminator or moved-from holder; just advance start = start->nextel.get(); } } } Loading Loading @@ -512,9 +545,7 @@ void libhtmlpp::HtmlString::_buildTree() { } } _buildtreenode(firstEl.get(),lastEl); _rootEl=std::move(firstEl->element); _buildtreenode(firstEl.get(),lastEl,_rootEl); HtmlString test; print(*_rootEl,test,true); Loading
src/html.h +1 −1 Original line number Diff line number Diff line Loading @@ -290,7 +290,7 @@ namespace libhtmlpp { std::unique_ptr<Element> _rootEl; void _serialelize(std::vector<char> in, HtmlElement* out); void _buildTree(); void _buildtreenode(DocElements *firstel,DocElements *lastel); void _buildtreenode(DocElements *firstel,DocElements *lastel,std::unique_ptr<Element>&html); std::vector<char> _Data; std::string _Str; friend void HtmlEncode(const std::string &input,HtmlString *output); Loading