diff --git a/dub.sdl b/dub.sdl new file mode 100644 index 0000000..8ca98a4 --- /dev/null +++ b/dub.sdl @@ -0,0 +1,9 @@ +name "cartastraccia" +description "A RSS feed reader" +authors "fra" +copyright "Copyright © 2019, fra" +license "GPLv3" +dependency "vibe-http" path="/home/fra/_progs/saoc/vibe-http" +dependency "sumtype" version="~>0.8.13" +dependency "pegged" version="*" +dependency "std-experimental-xml" version="~>0.1.7" diff --git a/example.xml b/example.xml new file mode 100644 index 0000000..1d2b06a --- /dev/null +++ b/example.xml @@ -0,0 +1,41 @@ + + + + Liftoff News + http://liftoff.msfc.nasa.gov/ + Liftoff to Space Exploration. + en-us + Tue, 10 Jun 2003 04:00:00 GMT + Tue, 10 Jun 2003 09:41:01 GMT + http://blogs.law.harvard.edu/tech/rss + Weblog Editor 2.0 + editor@example.com + webmaster@example.com + + Star City + http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp + How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>. + Tue, 03 Jun 2003 09:39:21 GMT + http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 + + + Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st. + Fri, 30 May 2003 11:06:42 GMT + http://liftoff.msfc.nasa.gov/2003/05/30.html#item572 + + + The Engine That Does More + http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp + Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that. + Tue, 27 May 2003 08:37:32 GMT + http://liftoff.msfc.nasa.gov/2003/05/27.html#item571 + + + Astronauts' Dirty Laundry + http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp + Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options. + Tue, 20 May 2003 08:56:02 GMT + http://liftoff.msfc.nasa.gov/2003/05/20.html#item570 + + + diff --git a/feeds.conf b/feeds.conf new file mode 100644 index 0000000..0ca4786 --- /dev/null +++ b/feeds.conf @@ -0,0 +1,2 @@ +Stallman 3h https://stallman.org/rss/rss.xml +Lobsters 3h https://lobste.rs/rss diff --git a/source/app.d b/source/app.d new file mode 100644 index 0000000..e0feb8e --- /dev/null +++ b/source/app.d @@ -0,0 +1,24 @@ +module app; + +import cartastraccia.rss; +import cartastraccia.config; +import cartastraccia.actor; + +import vibe.core.log; +import pegged.grammar; + +import std.stdio; +import std.file : readText; + +static immutable string feedsFile = "feeds.conf"; + +void main() +{ + // parse feed list + auto pt = ConfigFile(readText(feedsFile)); + assert(pt.successful, "Invalid "~feedsFile~" file format, check cartastraccia.config for grammar"); + processFeeds(pt); + + // parse every feed, update if needed + //parseRSS(readText("example.xml")); +} diff --git a/source/cartastraccia/actor.d b/source/cartastraccia/actor.d new file mode 100644 index 0000000..a85b88e --- /dev/null +++ b/source/cartastraccia/actor.d @@ -0,0 +1,16 @@ +module cartastraccia.actor; + +import std.algorithm : each; +import std.stdio; +import vibe.core.log; + +import pegged.grammar; + +void processFeeds(ParseTree pt) @trusted +{ + foreach(ref conf; pt.children) { + foreach(ref feed; conf.children) { + } + } +} + diff --git a/source/cartastraccia/config.d b/source/cartastraccia/config.d new file mode 100644 index 0000000..11e06d6 --- /dev/null +++ b/source/cartastraccia/config.d @@ -0,0 +1,32 @@ +module cartastraccia.config; + +import pegged.grammar; + +mixin(grammar(ConfigFileParser)); + +/** + * Specify grammar for config file in the form: + * ... + * [feed_name] [refresh_time] "[feed_address]" + * ... +*/ +immutable string ConfigFileParser = ` + ConfigFile: + + ConfigFile <- Feed (Newline Feed)* + + Feed <- Name space* Refresh space* Address + + Name <- identifier + + Refresh <- Number Timeunit + + Address <- ~([A-Za-z]+ "://" ( !Newline !">" . )+) + + Number <- ~([0-9]+) + + Timeunit <- [mshd] + + Newline <- endOfLine + +`; diff --git a/source/cartastraccia/rss.d b/source/cartastraccia/rss.d new file mode 100644 index 0000000..aeb7cc8 --- /dev/null +++ b/source/cartastraccia/rss.d @@ -0,0 +1,179 @@ +module cartastraccia.rss; + +import vibe.core.log; +import std.experimental.xml; +import sumtype; + +public: + +alias RSS = SumType!(ValidRSS, InvalidRSS); +alias RSSParent = SumType!(RSS, RSSChannel); + +/** + * In case an element was found + * which does not match the RSS 2.0 specs + * see: http://www.rssboard.org/rss-specification + */ +struct InvalidRSS { + string element; + string content; +} + +/** + * A valid RSS feed is made of various channels +*/ +struct ValidRSS { + string feedName = ""; + RSSChannel[string] channels; +} + +/** + * Each channel has properties + * and various items (actual news) +*/ +struct RSSChannel { + // required elements + string title; + string link; + string description; + + // optional elements + string language; + string copyright; + string managingEditor; + string webMaster; + string pubDate; + string lastBuildDate; + string category; + string generator; + string docs; + string cloud; + string ttl; + string image; + string rating; + string textInput; + string skipHours; + string skipDays; + + RSSItem[string] items; +} + +struct RSSItem { + // required elements + string title; + string link; + string description; + + // optional elements + string author; + string cathegory; + string comments; + string enclosure; + string guid; + string pubDate; + string source; +} + +void parseRSS(R)(R feed) @trusted +{ + auto cursor = chooseLexer!string + .parser + .cursor((CursorError err) { logWarn(err); }); + + cursor.setSource(feed); + + RSS rss; + cursor.enter(); + cursor.enter(); + if(cursor.name == "channel") { + if(cursor.enter()) { + alias C = typeof(cursor); + insertElement!(RSSChannel, RSS, C)(rss, rss, cursor); + cursor.next(); + } + } +} + +private: + +/** + * Insert an element (RSSChannel or RSSItem) which has: + * - A parent (be it the RSS xml root (RSSChannel + * or the RSSChannel in case of an RSSItem + * - Various sub-entries which are processed sequentially + * by advancing cursor +*/ +void insertElement(ElementType, Parent, C)( + ref RSS rss, ref Parent parent, ref C cursor) @trusted +{ + ElementType newElement; + + string elname; + + static if(is(ElementType == RSSChannel)) { + elname = "channel"; + static assert(is(Parent == RSS)); + } else if(is(ElementType == RSSItem)) { + elname = "item"; + static assert(is(Parent == RSSChannel)); + } else assert(false, "Invalid ElementType provided"); + + while(cursor.kind != XMLKind.elementEnd && cursor.name != elname) { + immutable name = cursor.name; + if(name == "item") { + static if(is(ElementType == RSSChannel)) { + logInfo("---> Found item:"); + cursor.enter(); + insertElement!(RSSItem, RSSChannel, C)(rss, newElement, cursor); + cursor.exit(); + } + } else { + cursor.enter(); + immutable content = cursor.content; + cursor.exit(); + + logInfo("Processing: " ~ name ~ ": " ~ content); + + fill: switch(name) { + default: + logWarn("Invalid XML entry detected: " ~ name); + rss = InvalidRSS(name, content); + break fill; + static if(is(ElementType == RSSChannel)) { + static foreach(m; __traits(allMembers, RSSChannel)) { + static if(m != "items") { + case m: + mixin("newElement."~m~" = content;"); + break fill; + } + } + } else if(is(ElementType == RSSItem)) { + static foreach(m; __traits(allMembers, RSSItem)) { + case m: + mixin("newElement."~m~" = content;"); + break fill; + } + } else { + assert(false, "Invalid ElementType requested"); + } + } + } + cursor.next(); + } + + rss.match!( + (ref InvalidRSS i) { + logWarn("Invalid XML entry detected: " + ~ i.element + ~ ": " + ~ i.content); + }, + (ref ValidRSS v) { + static if(is(ElementType == RSSChannel)) + parent.tryMatch!( + (ref ValidRSS v) => v.channels[newElement.title] = newElement); + else if(is(ElementType == RSSItem)) + parent.items[newElement.title] = newElement; + logInfo("Inserted " ~ elname ~ ": " ~ newElement.title); + }); +}