diff --git a/dub.sdl b/dub.sdl index e41ac72..694ff62 100644 --- a/dub.sdl +++ b/dub.sdl @@ -5,9 +5,8 @@ license "GPLv3" dependency "dxml" version="~>0.4.1" dependency "htmld" version="~>0.3.7" -dependency "pegged" version=">=0.0.0" -dependency "sumtype" version="~>0.8.13" dependency "requests" version="~>1.0.9" dependency "vibe-d:web" version=">0.8.5" - -subConfigurations "requests" "vibed" +dependency "sumtype" version="~>0.8.13" +dependency "d_rss" version="~>0.1.4" +dependency "pegged" version=">=0.0.0" diff --git a/source/app.d b/source/app.d index d3f476e..626c29f 100644 --- a/source/app.d +++ b/source/app.d @@ -140,6 +140,8 @@ void main(string[] args) { + setLogLevel(LogLevel.debug_); + // CLI arguments bool daemon = false; EndpointType endpoint = EndpointType.html; diff --git a/source/cartastraccia/actor.d b/source/cartastraccia/actor.d index c7f565e..5766b2f 100644 --- a/source/cartastraccia/actor.d +++ b/source/cartastraccia/actor.d @@ -42,7 +42,8 @@ req.keepAlive = false; req.timeout = ACTOR_REQ_TIMEOUT; auto res = req.get(path); - parseRSS(rss, cast(immutable string)res.responseBody.data); + string tmp = res.responseBody.data.to!string; + parseRSS(rss, tmp); } catch (Exception e) { diff --git a/source/cartastraccia/rss.d b/source/cartastraccia/rss.d index 14e52d6..6f41125 100644 --- a/source/cartastraccia/rss.d +++ b/source/cartastraccia/rss.d @@ -4,13 +4,14 @@ import vibe.core.log; import vibe.http.server : render; -import dxml.parser; import sumtype; +import mrss; -import std.algorithm : startsWith, sort; +import std.algorithm : startsWith, sort, move; import std.datetime; import std.range; import std.conv : to; +import std.string; public: @@ -59,7 +60,6 @@ // optional elements string language; string copyright; - string managingEditor; string webMaster; string pubDate; string lastBuildDate; @@ -124,26 +124,29 @@ } else logFatal("Invalid data format received from webserver."); } -/** - * Entrypoint for parsing a rss feed (repsesented as string) -*/ -void parseRSS(ref RSS rss, immutable string feed) @trusted +void parseRSS(ref RSS rss, string feed) @trusted { - auto rssRange = parseXML!simpleXML(feed); + mrss_t* rssData; + mrss_error_t err = mrss_parse_buffer(feed.toZString, feed.length, &rssData); - if(rssRange.front.name == "html") { - rss = InvalidRSS("html", ""); - return; + if(err) { + rss = InvalidRSS("mrss", ""); + + } else { + rss = ValidRSS(); + + rss.tryMatch!( + (ref ValidRSS vrss) { + newChannel(rssData, vrss); + + mrss_item_t* item = rssData.item; + while(item) { + newItem(item, vrss); + item = rssData.item; + } + }); } - while(rssRange.front.type != EntityType.text && rssRange.front.name != "channel") { - rssRange.popFront(); - } - rssRange.popFront(); - - alias C = typeof(rssRange); - insertElement!(RSSChannel, RSS, C)(rss, rss, rssRange); - // parse date and sort in descending order (newest first) rss.tryMatch!( (ref InvalidRSS i) { @@ -157,119 +160,42 @@ }); } - private: -/** - * Insert an element (RSSChannel or RSSItem) which has: - * - A parent (be it the RSS xml root (RSSChannel - * or the RSSChannel in case of an RSSItem - * - Various sub-entries which are processed sequentially - * by advancing rssRange -*/ -void insertElement(ElementType, Parent, C)( - ref RSS rss, ref Parent parent, ref C rssRange) @trusted +void newChannel(mrss_t* rssData, ref ValidRSS rss) { - ElementType newElement; - - mixin(selectElementName); - - // advance the parser to completion, entry by entry - while(rssRange.front.type != EntityType.elementEnd - && rssRange.front.type != EntityType.text - && rssRange.front.name != elname) { - - immutable name = rssRange.front.name; - rssRange.popFront(); - - if(name == "item") { - - // recursively insert items - static if(is(ElementType == RSSChannel)) { - insertElement!(RSSItem, RSSChannel, C)(rss, newElement, rssRange); - } else { - rss = InvalidRSS(name, ""); - } - - } else if(name == "image" || name == "media:content") { - // skip images - while(rssRange.front.type != EntityType.elementEnd - && rssRange.front.name != name) { - rssRange.popFront(); // elementStart - rssRange.popFront(); // text - rssRange.popFront(); // elementEnd - } - - } else if(rssRange.front.type == EntityType.text - || rssRange.front.type == EntityType.cdata) { - - // found a valid text field - immutable content = rssRange.front.text; - rssRange.popFront(); - - fill: switch(name) { - - default: - // we don't care about entries which are not attributes of RSSChannel - logDebug("Ignoring XML Entity: " ~ name); - break fill; - - // inserting a channel - static if(is(ElementType == RSSChannel)) { - static foreach(m; __traits(allMembers, RSSChannel)) { - static if(m != "items") { - case m: - mixin("newElement."~m~" = content;"); - break fill; - } - } - - // inserting an item - } else if(is(ElementType == RSSItem)) { - static foreach(m; __traits(allMembers, RSSItem)) { - case m: - mixin("newElement."~m~" = content;"); - break fill; - } - - // should not get here (means function invocation was invalid) - } else assert(false, "Invalid ElementType requested"); - } + static foreach(m; __traits(allMembers, RSSChannel)) { + static if(is(typeof(__traits(getMember, mrss_t, m)) == char[])) { + mixin("rss.channel."~m~" = rssData."~m~".ZtoString.idup;"); } - // skip elementEnd - rssRange.popFront(); } - - // finished channel / item parsing. Insert it into rss struct - rss.match!( - (ref InvalidRSS i) { - logWarn("Invalid XML Entity detected: " - ~ i.element - ~ ": " - ~ i.content); - }, - (ref FailedRSS f) {}, - (ref ValidRSS v) { - static if(is(ElementType == RSSChannel)) - parent.tryMatch!( - (ref ValidRSS v) { - v.channel = newElement; - }); - else if(is(ElementType == RSSItem)) - parent.items ~= newElement; - logInfo("Inserted " ~ elname ~ ": " ~ newElement.title); - }); } -static immutable string selectElementName = " - string elname; +void newItem(mrss_item_t* rssItem, ref ValidRSS rss) +{ + RSSItem newItem; - static if(is(ElementType == RSSChannel)) { - elname = \"channel\"; - static assert(is(Parent == RSS)); - } else if(is(ElementType == RSSItem)) { - elname = \"item\"; - static assert(is(Parent == RSSChannel)); - } else assert(false, \"Invalid ElementType provided\"); -"; + static foreach(m; __traits(allMembers, RSSItem)) { + static if(is(typeof(__traits(getMember, mrss_t, m)) == char[])) { + mixin("newItem."~m~" = rssItem."~m~".ZtoString.idup;"); + } + } + rss.channel.items ~= newItem; +} + +string ZtoString(const char* c) +{ + if (c !is null) + return to!string(fromStringz(c)); + else + return null; +} + +char* toZString(string s) +{ + char[] ret=cast(char[])s; + if (ret[$-1]!='\0') + ret~="\0"; + return ret.ptr; +}