diff --git a/README.md b/README.md index b5ee1b7..1dec320 100644 --- a/README.md +++ b/README.md @@ -24,13 +24,38 @@ ## Installation -Requires [Dub](https://github.com/dlang/dub): +This program is compatible with a Unix-like OS, notably GNU/Linux. Other +platforms (OSX, Windows) are not supported and they probably won't ever be. + +### Dependencies + +Carta Straccia uses +[libmrss](https://autistici.org/bakunin/libmrss/doc/index.html) to parse RSS +feeds. It can be installed in the following ways: + +* **Using your package manager**: `libmrss` can be installed from the main + repositories of some distros, using the appropriate package manager. Examples: + - Gentoo/portage: `emerge libmrss` + - Debian/apt and derivatives: `apt install libmrss` + - etc. + + +* **From source**: If `libmrss` is not available for your distribution, +it can be built and installed from source. See: +[https://github.com/bakulf/libmrss](https://github.com/bakulf/libmrss). + +### Building + +Requires a working D compiler and [Dub](https://github.com/dlang/dub): 1. clone this repo: + ``` git clone https://github.com/gallafrancesco/cartastraccia.git ``` + 2. build: + ``` dub build ``` @@ -49,3 +74,12 @@ ## License This project is licensed under the terms of the GPLv3 License. + +## Contributing + +Feel free to open issues and PRs. Current TODOs are: + +* Test extensively parsing of RSS feeds. +* Work on a comfortable and polished CLI endpoint +* Add enpoints in general (new visualization, curses, improve HTML...) +* Documentation and usage examples diff --git a/dub.sdl b/dub.sdl index e41ac72..775c5df 100644 --- a/dub.sdl +++ b/dub.sdl @@ -3,11 +3,12 @@ authors "fra" copyright "Copyright © 2019, fra" license "GPLv3" +mainSourceFile "source/main.d" dependency "dxml" version="~>0.4.1" dependency "htmld" version="~>0.3.7" -dependency "pegged" version=">=0.0.0" -dependency "sumtype" version="~>0.8.13" dependency "requests" version="~>1.0.9" dependency "vibe-d:web" version=">0.8.5" +dependency "sumtype" version="~>0.8.13" +dependency "pegged" version=">=0.0.0" -subConfigurations "requests" "vibed" +libs "mrss" diff --git a/source/app.d b/source/app.d deleted file mode 100644 index d3f476e..0000000 --- a/source/app.d +++ /dev/null @@ -1,170 +0,0 @@ -module app; - -import cartastraccia.config; -import cartastraccia.asciiart; -import cartastraccia.actor; -import cartastraccia.endpoint; - -import vibe.core.log; -import vibe.core.file; -import vibe.http.server; -import vibe.http.router; -import vibe.http.fileserver; -import vibe.inet.url; -import vibe.http.client; -import vibe.web.web; -import vibe.core.core; -import vibe.stream.operations : readAllUTF8; -import vibe.core.concurrency; -import pegged.grammar; -import sumtype; -import requests; - -import std.exception; -import std.stdio; -import std.file : readText; -import std.algorithm : each; -import std.datetime : SysTime; -import std.getopt; -import std.conv : to; -import std.process; - -immutable string info = " -============================================= -| Carta Straccia is a RSS feed aggregator | -============================================= -0. Write a feeds.conf file [feed_name refresh_timeout feed_url] -> echo \"Stallman 3h https://stallman.org/rss/rss.xml\" > feeds.conf ---------------------------------------------- -1. Start the daemon: -> cartastraccia --daemon --endpoint=cli --endpoint=html --feeds=feeds.conf ---------------------------------------------- -2. Connect to daemon using HTML endpoint -> cartastraccia --browser=/path/to/browser ----------------------------------------------"; - -void runWebServer(ref URLRouter router, immutable string bindAddress, immutable ushort bindPort) -{ - auto settings = new HTTPServerSettings; - settings.port = bindPort; - settings.bindAddresses = ["127.0.0.1", bindAddress]; - - listenHTTP(settings, router); - runEventLoop(); -} - -void runDaemon(immutable string feedsFile, immutable - string bindAddress, immutable ushort bindPort) -{ - // parse feed list - auto pt = ConfigFile(readText(feedsFile)); - enforce(pt.successful, "Invalid "~feedsFile~" file format, check cartastraccia.config for grammar"); - auto feeds = processFeeds(pt); - TaskMap tasks; - - feeds.match!( - (InvalidFeeds i) { - logWarn("Invalid feeds processed. Exiting."); - return; - }, - (RSSFeed[] fl) { - - // n. threads == n. feeds - setupWorkerThreads(fl.length.to!uint); - - // start tasks in charge of updating feeds - feeds.match!( - (InvalidFeeds i) => logFatal(i.msg), - (RSSFeed[] fl) { - fl.each!( - (RSSFeed feed) { - logInfo("Starting task: "~feed.name); - // start workers to serve RSS data - tasks[feed.name] = runWorkerTaskH( - &feedActor, feed.name, feed.path, 0); - }); - }); - - // initialize a new service to serve requests - auto router = new URLRouter; - router.registerWebInterface(new EndpointService(feeds, tasks)); - router.get("*", serveStaticFiles("public/")); - - // start the webserver in main thread - runWebServer(router, bindAddress, bindPort); - }); -} - -void runClient(EndpointType endpoint, immutable string browser, immutable string - bindAddress, immutable ushort bindPort, immutable bool reloadFeeds) -{ - - if(reloadFeeds) { - try { - string url = "http://"~bindAddress~":"~bindPort.to!string~"/reload"; - auto req = Request(); - req.keepAlive = false; - req.timeout = ACTOR_REQ_TIMEOUT; - req.get(url); - - } catch (Exception e) { - logWarn("ERROR from daemon: "~e.msg~"\nCannot reload feeds file."); - } - } - - if(endpoint == EndpointType.cli) { - try { - string url = "http://"~bindAddress~":"~bindPort.to!string~"/cli"; - auto req = Request(); - req.keepAlive = false; - req.timeout = ACTOR_REQ_TIMEOUT; - req.get(url); - - } catch (Exception e) { - logWarn("ERROR from daemon: "~e.msg~"\nCheck daemon logs for details (is it running?)"); - } - - } else if(endpoint == EndpointType.html) { - - if(!existsFile(browser)) { - logWarn("Could not find browser: "~browser); - logWarn("Try running: cartastraccia --browser=[/path/to/browser]"); - return; - } - - immutable address = "http://"~bindAddress~":"~bindPort.to!string; - auto pid = spawnShell(browser ~" "~address); - wait(pid); - } -} - -void main(string[] args) -{ - // CLI arguments - bool daemon = false; - EndpointType endpoint = EndpointType.html; - string feedsFile = "feeds.conf"; - string bindAddress = "localhost"; - ushort bindPort = 8080; - string browser = "/usr/bin/elinks"; - bool reloadFeeds = false; - - auto helpInformation = getopt( - args, - "daemon|d", "Start daemon", &daemon, - "endpoint|e", "Endpoints to register [cli]", &endpoint, - "feeds|f", "File containing feeds to pull [feeds.conf]", &feedsFile, - "host|l", "Bind to this address [localhost]", &bindAddress, - "port|p", "Bind to this port [8080]", &bindPort, - "browser|b", "Absolute path to browser for HTML rendering [/usr/bin/elinks]", &browser, - "reload|r", "Reload feeds file", &reloadFeeds - ); - - if(helpInformation.helpWanted) { - defaultGetoptPrinter(info, helpInformation.options); - return; - } - - if(daemon) runDaemon(feedsFile, bindAddress, bindPort); - else runClient(endpoint, browser, bindAddress, bindPort, reloadFeeds); -} diff --git a/source/cartastraccia/actor.d b/source/cartastraccia/actor.d index c7f565e..757ad71 100644 --- a/source/cartastraccia/actor.d +++ b/source/cartastraccia/actor.d @@ -1,3 +1,26 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * Feed actor logic and inter-actor communication primitives. + * +*/ + module cartastraccia.actor; import cartastraccia.rss; @@ -20,6 +43,8 @@ import core.time; import std.conv : to; import std.variant; +import std.string : assumeUTF; +import std.utf : validate; alias TaskMap = Task[string]; @@ -42,7 +67,9 @@ req.keepAlive = false; req.timeout = ACTOR_REQ_TIMEOUT; auto res = req.get(path); - parseRSS(rss, cast(immutable string)res.responseBody.data); + string tmp = res.responseBody.data.assumeUTF; + validate(tmp); + parseRSS(rss, tmp); } catch (Exception e) { @@ -56,7 +83,7 @@ rss.match!( (ref InvalidRSS i) { logWarn("Invalid feed at: "~path); - logWarn("Caused by entry \""~i.element~"\": "~i.content); + logWarn("Caused by: \""~i.element~"\": "~i.content); }, (ref FailedRSS f) { logWarn("Failed to load feed: "~ feedName); diff --git a/source/cartastraccia/asciiart.d b/source/cartastraccia/asciiart.d index 73ffe6d..e7fb2e0 100644 --- a/source/cartastraccia/asciiart.d +++ b/source/cartastraccia/asciiart.d @@ -1,3 +1,29 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * Ascii art for the title bar of Cartastraccia. + * --- + * Credits to http://www.patorjk.com/software/taag/ + * for the incredibly useful and enjoyable tool. + * +*/ + module cartastraccia.asciiart; static immutable string asciiArt = r" diff --git a/source/cartastraccia/config.d b/source/cartastraccia/config.d index 9527570..59e492f 100644 --- a/source/cartastraccia/config.d +++ b/source/cartastraccia/config.d @@ -1,3 +1,26 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * Config (feeds) file parsing. + * +*/ + module cartastraccia.config; import pegged.grammar; diff --git a/source/cartastraccia/endpoint.d b/source/cartastraccia/endpoint.d index c3c5ab5..9533de8 100644 --- a/source/cartastraccia/endpoint.d +++ b/source/cartastraccia/endpoint.d @@ -1,3 +1,26 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * Pluggable endpoint interface using Vibe.d services. + * +*/ + module cartastraccia.endpoint; import cartastraccia.config; diff --git a/source/cartastraccia/include/mrss.d b/source/cartastraccia/include/mrss.d new file mode 100644 index 0000000..44887ac --- /dev/null +++ b/source/cartastraccia/include/mrss.d @@ -0,0 +1,916 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * --- + * This is a direct translation of the mrss.h header file + * its only purpose is to act as an interface from its original C source to + * the D programming language. + * All credit for the original source goes to: + * bakunin - Andrea Marchesini + * https://www.autistici.org/bakunin/libmrss/ + * --- + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * D interface file for libmrss. + * +*/ + + +module cartastraccia.include.mrss; + +import std.string : fromStringz; +import std.conv : to; + +import core.stdc.time; + +string ZtoString(const char* c) nothrow +{ + if (c !is null) + return to!string(fromStringz(c)); + else + return null; +} + +auto toZString(string s, ref size_t len) nothrow +{ + char[] ret = s.to!(char[]); + if (ret[$-1] != '\0') + ret ~= "\0".to!(char[]); + len = ret.length; + return ret.ptr; +} + +extern(C) nothrow +{ + +alias mrss_generic_t = void*; + +/** This enum describes the error type of libmrss */ +immutable enum mrss_error_t { + MRSS_OK = 0, /**< No error */ + MRSS_ERR_POSIX, /**< For the correct error, use errno */ + MRSS_ERR_PARSER, /**< Parser error */ + MRSS_ERR_DOWNLOAD, /**< Download error */ + MRSS_ERR_VERSION, /**< The RSS has a no compatible VERSION */ + MRSS_ERR_DATA /**< The parameters are incorrect */ +} + +immutable enum mrss_version_t { + MRSS_VERSION_0_91, /**< 0.91 RSS version */ + MRSS_VERSION_0_92, /**< 0.92 RSS version */ + MRSS_VERSION_1_0, /**< 1.0 RSS version */ + MRSS_VERSION_2_0, /**< 2.0 RSS version */ + MRSS_VERSION_ATOM_0_3, /**< 0.3 Atom version */ + MRSS_VERSION_ATOM_1_0 /**< 1.0 Atom version */ +} + +/** Flag list for mrss_set and mrss_get functions */ +immutable enum mrss_flag_t { + /* Generic */ + + /** Set the ersion to a mrss_t element - the value is a mrss_version_t enum */ + MRSS_FLAG_VERSION = 1, + + /** Set the title to a mrss_t element - the value is a string */ + MRSS_FLAG_TITLE, + /** Set the title type to a mrss_t element - the value is a string (ex: text, html, ...)*/ + MRSS_FLAG_TITLE_TYPE, + /** Set the description to a mrss_t element - the value is a string */ + MRSS_FLAG_DESCRIPTION, + /** Set the description type to a mrss_t element - the value is a string */ + MRSS_FLAG_DESCRIPTION_TYPE, + /** Set the link to a mrss_t element - the value is a string */ + MRSS_FLAG_LINK, + /** Set the id to a mrss_t element - the value is a string */ + MRSS_FLAG_ID, + /** Set the language to a mrss_t element - the value is a string */ + MRSS_FLAG_LANGUAGE, + /** Set the rating to a mrss_t element - the value is a string */ + MRSS_FLAG_RATING, + /** Set the copyright to a mrss_t element - the value is a string */ + MRSS_FLAG_COPYRIGHT, + /** Set the copyright type to a mrss_t element - the value is a string */ + MRSS_FLAG_COPYRIGHT_TYPE, + /** Set the pubDate to a mrss_t element - the value is a string */ + MRSS_FLAG_PUBDATE, + /** Set the lastBuildDate to a mrss_t element - the value is a string */ + MRSS_FLAG_LASTBUILDDATE, + /** Set the docs to a mrss_t element - the value is a string */ + MRSS_FLAG_DOCS, + /** Set the managingeditor to a mrss_t element - the value is a string */ + MRSS_FLAG_MANAGINGEDITOR, + /** Set the managingeditor's email to a mrss_t element - the value is a string */ + MRSS_FLAG_MANAGINGEDITOR_EMAIL, + /** Set the managingeditor's uri to a mrss_t element - the value is a string */ + MRSS_FLAG_MANAGINGEDITOR_URI, + /** Set the webMaster to a mrss_t element - the value is a string */ + MRSS_FLAG_WEBMASTER, + /** Set the generator to a mrss_t element - the value is a string */ + MRSS_FLAG_TTL, + /** Set the about to a mrss_t element - the value is a string */ + MRSS_FLAG_ABOUT, + + /* Contributor */ + + /** Set the contributor to a mrss_t element - the value is a string */ + MRSS_FLAG_CONTRIBUTOR, + /** Set the contributor's email to a mrss_t element - the value is a string */ + MRSS_FLAG_CONTRIBUTOR_EMAIL, + /** Set the contributor's uri to a mrss_t element - the value is a string */ + MRSS_FLAG_CONTRIBUTOR_URI, + + /* Generator */ + + /** Set the generator to a mrss_t element - the value is a string */ + MRSS_FLAG_GENERATOR, + /** Set the generator's email to a mrss_t element - the value is a string */ + MRSS_FLAG_GENERATOR_URI, + /** Set the generator's uri to a mrss_t element - the value is a string */ + MRSS_FLAG_GENERATOR_VERSION, + + /* Image */ + + /** Set the image_title to a mrss_t element - the value is a string */ + MRSS_FLAG_IMAGE_TITLE, + /** Set the image_url to a mrss_t element - the value is a string */ + MRSS_FLAG_IMAGE_URL, + /** Set the image_logo to a mrss_t element - the value is a string */ + MRSS_FLAG_IMAGE_LOGO, + /** Set the image_link to a mrss_t element - the value is a string */ + MRSS_FLAG_IMAGE_LINK, + /** Set the image_width to a mrss_t element - the value is a integer */ + MRSS_FLAG_IMAGE_WIDTH, + /** Set the image_height to a mrss_t element - the value is a integer */ + MRSS_FLAG_IMAGE_HEIGHT, + /** Set the image_description to a mrss_t element - the value is a string */ + MRSS_FLAG_IMAGE_DESCRIPTION, + + /* TextInput */ + + /** Set the textinput_title to a mrss_t element - the value is a string */ + MRSS_FLAG_TEXTINPUT_TITLE, + /** Set the textinput_description to a mrss_t element - the value is a string */ + MRSS_FLAG_TEXTINPUT_DESCRIPTION, + /** Set the textinput_name to a mrss_t element - the value is a string */ + MRSS_FLAG_TEXTINPUT_NAME, + /** Set the textinput_link to a mrss_t element - the value is a string */ + MRSS_FLAG_TEXTINPUT_LINK, + + /* Cloud */ + + /** Set the cloud to a mrss_t element - the value is a string */ + MRSS_FLAG_CLOUD, + /** Set the cloud_domain to a mrss_t element - the value is a string */ + MRSS_FLAG_CLOUD_DOMAIN, + /** Set the cloud_port to a mrss_t element - the value is a string */ + MRSS_FLAG_CLOUD_PORT, + /** Set the cloud_path to a mrss_t element - the value is a integer */ + MRSS_FLAG_CLOUD_PATH, + /** Set the cloud_registerProcedure to a mrss_t element - + * the value is a string */ + MRSS_FLAG_CLOUD_REGISTERPROCEDURE, + /** Set the cloud_protocol to a mrss_t element - the value is a string */ + MRSS_FLAG_CLOUD_PROTOCOL, + + /* SkipHours */ + + /** Set the hour to a mrss_hour_t element - the value is a string */ + MRSS_FLAG_HOUR, + + /* SkipDays */ + + /** Set the day to a mrss_day_t element - the value is a string */ + MRSS_FLAG_DAY, + + /* Category or Item/Category */ + + /** Set the category to a mrss_category_t element - the value is a string */ + MRSS_FLAG_CATEGORY, + /** Set the domain to a mrss_category_t element - the value is a string */ + MRSS_FLAG_CATEGORY_DOMAIN, + /** Set the label to a mrss_category_t element - the value is a string */ + MRSS_FLAG_CATEGORY_LABEL, + + /* Item */ + + /** Set the title to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_TITLE, + /** Set the title type to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_TITLE_TYPE, + /** Set the link to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_LINK, + /** Set the description to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_DESCRIPTION, + /** Set the description type to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_DESCRIPTION_TYPE, + /** Set the copyright to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_COPYRIGHT, + /** Set the copyright type to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_COPYRIGHT_TYPE, + + /** Set the author to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_AUTHOR, + /** Set the author's uri to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_AUTHOR_URI, + /** Set the author's email to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_AUTHOR_EMAIL, + + /** Set the contributor to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_CONTRIBUTOR, + /** Set the contributor's uri to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_CONTRIBUTOR_URI, + /** Set the contributor's email to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_CONTRIBUTOR_EMAIL, + + /** Set the comments to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_COMMENTS, + /** Set the pubDate to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_PUBDATE, + /** Set the guid to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_GUID, + /** Set the guid_isPermaLink to a mrss_item_t element - + * the value is a integer */ + MRSS_FLAG_ITEM_GUID_ISPERMALINK, + /** Set the source to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_SOURCE, + /** Set the source_url to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_SOURCE_URL, + /** Set the enclosure to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_ENCLOSURE, + /** Set the enclosure_url to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_ENCLOSURE_URL, + /** Set the enclosure_length to a mrss_item_t element - + * the value is a integer */ + MRSS_FLAG_ITEM_ENCLOSURE_LENGTH, + /** Set the enclosure_type to a mrss_item_t element - the value is a string */ + MRSS_FLAG_ITEM_ENCLOSURE_TYPE, + + /* Item */ + + /** Set the name to a mrss_tag_t element - the value is a string */ + MRSS_FLAG_TAG_NAME, + + /** Set the value to a mrss_tag_t element - the value is a string */ + MRSS_FLAG_TAG_VALUE, + + /** Set the namespace to a mrss_tag_t element - the value is a string */ + MRSS_FLAG_TAG_NS, + + /** Set the name to a mrss_attribute_t element - the value is a string */ + MRSS_FLAG_ATTRIBUTE_NAME, + + /** Set the value to a mrss_attribute_t element - the value is a string */ + MRSS_FLAG_ATTRIBUTE_VALUE, + + /** Set the namespace to a mrss_attribute_t element - the value is a string */ + MRSS_FLAG_ATTRIBUTE_NS, + + /** Set the terminetor flag */ + MRSS_FLAG_END = 0 + +} + +/** Enum for the casting of the libmrss data struct */ +immutable enum mrss_element_t { + /** The data struct is a mrss_t */ + MRSS_ELEMENT_CHANNEL, + /** The data struct is a mrss_item_t */ + MRSS_ELEMENT_ITEM, + /** The data struct is a mrss_hour_t */ + MRSS_ELEMENT_SKIPHOURS, + /** The data struct is a mrss_day_t */ + MRSS_ELEMENT_SKIPDAYS, + /** The data struct is a mrss_category_t */ + MRSS_ELEMENT_CATEGORY, + /** The data struct is a mrss_tag_t */ + MRSS_ELEMENT_TAG, + /** The data struct is a mrss_attribute_t */ + MRSS_ELEMENT_ATTRIBUTE +} + +/** Data struct for any items of RSS. It contains a pointer to the list + * of categories. + * + * \brief + * Struct data for item elements */ +struct mrss_item_t { + + /** For internal use only: */ + mrss_element_t element; + int allocated; + + /* Data: */ + + /* 0.91 0.92 1.0 2.0 ATOM */ + char *title; /* R O O O R */ + char *title_type; /* - - - - O */ + char *link; /* R O O O O */ + char *description; /* R O - O O */ + char *description_type; /* - - - - 0 */ + char *copyright; /* - - - - O */ + char *copyright_type; /* - - - - O */ + + char *author; /* - - - O O */ + char *author_uri; /* - - - - O */ + char *author_email; /* - - - - O */ + + char *contributor; /* - - - - O */ + char *contributor_uri; /* - - - - O */ + char *contributor_email; /* - - - - O */ + + char *comments; /* - - - O - */ + char *pubDate; /* - - - O O */ + char *guid; /* - - - O O */ + int guid_isPermaLink; /* - - - O - */ + + char *source; /* - O - O - */ + char *source_url; /* - R - R - */ + + char *enclosure; /* - O - O - */ + char *enclosure_url; /* - R - R - */ + int enclosure_length; /* - R - R - */ + char *enclosure_type; /* - R - R - */ + + mrss_category_t *category; /* - O - O O */ + mrss_tag_t *other_tags; + mrss_item_t *next; +}; + +/** Data struct for skipHours elements. + * + * \brief + * Struct data for skipHours elements */ +struct mrss_hour_t { + /** For internal use only: */ + mrss_element_t element; + int allocated; + + /* Data: */ + /* 0.91 0.92 1.0 2.0 ATOM */ + char *hour; /* R R - R - */ + mrss_hour_t *next; +}; + +/** Data struct for skipDays elements. + * + * \brief + * Struct data for skipDays elements */ +struct mrss_day_t { + /** For internal use only: */ + mrss_element_t element; + int allocated; + + /* Data: */ + /* 0.91 0.92 1.0 2.0 ATOM */ + char *day; /* R R - R - */ + mrss_day_t *next; +}; + +/** Data struct for category elements + * + * \brief + * Struct data for category elements */ +struct mrss_category_t { + /** For internal use only: */ + mrss_element_t element; + int allocated; + + /* Data: */ + /* 0.91 0.92 1.0 2.0 ATOM */ + char *category; /* - R - R R */ + char *domain; /* - O - O O */ + char *label; /* - - - - O */ + mrss_category_t *next; +}; + +/** Principal data struct. It contains pointers to any other structures. + * + * \brief + * Principal data struct. It contains pointers to any other structures */ +struct mrss_t { + /** For internal use only: */ + mrss_element_t element; + int allocated; + int curl_error; + + /* Data: */ + + char *file; + size_t size; + char *encoding; + + mrss_version_t _version; /* 0.91 0.92 1.0 2.0 ATOM */ + + char *title; /* R R R R R */ + char *title_type; /* - - - - O */ + char *description; /* R R R R R */ + char *description_type; /* - - - - O */ + char *link; /* R R R R O */ + char *id; /* - - - - O */ + char *language; /* R O - O O */ + char *rating; /* O O - O - */ + char *copyright; /* O O - O O */ + char *copyright_type; /* - - - - O */ + char *pubDate; /* O O - O - */ + char *lastBuildDate; /* O O - O O */ + char *docs; /* O O - O - */ + char *managingeditor; /* O O - O O */ + char *managingeditor_email; /* O O - O O */ + char *managingeditor_uri; /* O O - O O */ + char *webMaster; /* O O - O - */ + int ttl; /* - - - O - */ + char *about; /* - - R - - */ + + /* Contributor */ /* - - - - O */ + char *contributor; /* - - - - R */ + char *contributor_email; /* - - - - O */ + char *contributor_uri; /* - - - - O */ + + /* Generator */ + char *generator; /* - - - O O */ + char *generator_uri; /* - - - - O */ + char *generator_version; /* - - - - O */ + + /* Tag Image: */ /* O O O O - */ + char *image_title; /* R R R R - */ + char *image_url; /* R R R R O */ + char *image_logo; /* - - - - O */ + char *image_link; /* R R R R - */ + uint image_width; /* O O - O - */ + uint image_height; /* O O - O - */ + char *image_description; /* O O - O - */ + + /* TextInput: */ /* O O O O - */ + char *textinput_title; /* R R R R - */ + char *textinput_description; /* R R R R - */ + char *textinput_name; /* R R R R - */ + char *textinput_link; /* R R R R - */ + + /* Cloud */ + char *cloud; /* - O - O - */ + char *cloud_domain; /* - R - R - */ + int cloud_port; /* - R - R - */ + char *cloud_path; /* - R - R - */ + char *cloud_registerProcedure;/* - R - R - */ + char *cloud_protocol; /* - R - R - */ + + mrss_hour_t *skipHours; /* O O - O - */ + mrss_day_t *skipDays; /* O O - O - */ + + mrss_category_t *category; /* - O - O O */ + + mrss_item_t *item; /* R R R R R */ + + mrss_tag_t *other_tags; + +//#ifdef USE_LOCALE + //void *c_locale; +//#endif + +}; + +/** Data struct for any other tag out of the RSS namespace. + * + * \brief + * Struct data for external tags */ +struct mrss_tag_t { + /** For internal use only: */ + mrss_element_t element; + int allocated; + + /*name of the tag */ + char *name; + + /* value */ + char *value; + + /* namespace */ + char *ns; + + /* list of attributes: */ + mrss_attribute_t *attributes; + + /* Sub tags: */ + mrss_tag_t *children; + + /* the next tag: */ + mrss_tag_t *next; +}; + +/** Data struct for the attributes of the tag + * + * \brief + * Struct data for external attribute */ +struct mrss_attribute_t { + /** For internal use only: */ + mrss_element_t element; + int allocated; + + /* name of the tag */ + char *name; + + /* value */ + char *value; + + /* namespace */ + char *ns; + + /* The next attribute: */ + mrss_attribute_t *next; +}; + +/** Options data struct. It contains some user preferences. + * + * \brief + * Options data struct. It contains some user preferences. */ +struct mrss_options_t { + int timeout; + char *proxy; + char *proxy_authentication; + char *certfile; + char *cacert; + char *password; + int verifypeer; + char *authentication; + char *user_agent; +}; + +/** PARSE FUNCTIONS *********************************************************/ + +/** + * Parses a url and creates the data struct of the feed RSS url. + * This function downloads your request if this is http or ftp. + * \param url The url to be parsed + * \param mrss the pointer to your data struct + * \return the error code + */ +mrss_error_t mrss_parse_url (char * url, + mrss_t ** mrss); + +/** + * Like the previous function but with a options struct. + * \param url The url to be parsed + * \param mrss the pointer to your data struct + * \param options a pointer to a options data struct + * \return the error code + */ +mrss_error_t mrss_parse_url_with_options + (char * url, + mrss_t ** mrss, + mrss_options_t * options); + +/** + * Like the previous function but with CURLcode error + * \param url The url to be parsed + * \param mrss the pointer to your data struct + * \param options a pointer to a options data struct. It can be NULL + * \param curlcode the error code from libcurl + * \return the error code + */ +//mrss_error_t mrss_parse_url_with_options_and_error + //(char * url, + //mrss_t ** mrss, + //mrss_options_t * options, + //CURLcode * curlcode); + +/** + * Like the previous function but you take ownership of the downloaded buffer + * in case of success + * \param url The url to be parsed + * \param mrss the pointer to your data struct + * \param options a pointer to a options data struct + * \param curlcode the error code from libcurl + * \param feed_content a pointer to the buffer with the document. This is not NULL terminated + * \param feed_size the size of the buffer above + * \return the error code + */ +//mrss_error_t mrss_parse_url_with_options_error_and_transfer_buffer + //(char * url, + //mrss_t ** mrss, + //mrss_options_t * options, + //CURLcode * curlcode, + //char ** feed_content, + //int * feed_size); + +/** + * Parses a file and creates the data struct of the feed RSS url + * \param file The file to be parsed + * \param mrss the pointer to your data struct + * \return the error code + */ +mrss_error_t mrss_parse_file (char * file, + mrss_t ** mrss); + +/** + * Parses a buffer and creates the data struct of the feed RSS url + * \param buffer Pointer to the xml memory stream to be parsed + * \param size_buffer The size of the array of char + * \param mrss the pointer to your data struct + * \return the error code + */ +mrss_error_t mrss_parse_buffer (char * buffer, + size_t size_buffer, + mrss_t ** mrss); + +/** WRITE FUNCTIONS *********************************************************/ + +/** + * Writes a RSS struct data in a local file + * \param mrss the rss struct data + * \param file the local file + * \return the error code + */ +mrss_error_t mrss_write_file (mrss_t * mrss, + char * file); + +/** + * Write a RSS struct data in a buffer. + * + * \code + * char *buffer; + * buffer=NULL; //<--- This is important!! + * mrss_write_buffer (mrss, &buffer); + * \endcode + * + * The buffer must be NULL. + * \param mrss the rss struct data + * \param buffer the buffer + * \return the error code + */ +mrss_error_t mrss_write_buffer (mrss_t * mrss, + char ** buffer); + +/** FREE FUNCTION ***********************************************************/ + +/** + * This function frees any type of data struct of libmrss. If the element + * is alloced by libmrss, it will be freed, else this function frees + * only the internal data. + * + * \code + * mrss_t *t=....; + * mrss_item_t *item=...; + * + * mrss_free(t); + * mrss_free(item); + * \endcode + * + * \param element the data struct + * \return the error code + */ +mrss_error_t mrss_free (mrss_generic_t element); + +/** GENERIC FUNCTION ********************************************************/ + +/** + * This function returns a static string with the description of error code + * \param err the error code that you need as string + * \return a string. Don't free this string! + */ +char * mrss_strerror (mrss_error_t err); + +/** + * This function returns a static string with the description of curl code + * \param err the error code that you need as string + * \return a string. Don't free this string! + */ +//char * mrss_curl_strerror (CURLcode err); + +/** + * This function returns the mrss_element_t of a mrss data struct. + * \param element it is the element that you want check + * \param ret it is a pointer to a mrss_element_t. It will be sets. + * \return the error code + */ +mrss_error_t mrss_element (mrss_generic_t element, + mrss_element_t *ret); + +/** + * This function returns the number of seconds sinze Jennuary 1st 1970 in the + * UTC time zone, for the url that the urlstring parameter specifies. + * + * \param urlstring the url + * \param lastmodified is a pointer to a time_t struct. The return value can + * be 0 if the HEAD request does not return a Last-Modified value. + * \return the error code + */ +mrss_error_t mrss_get_last_modified (char * urlstring, + time_t * lastmodified); + +/** + * Like the previous function but with a options struct. + * + * \param urlstring the url + * \param lastmodified is a pointer to a time_t struct. The return value can + * be 0 if the HEAD request does not return a Last-Modified value. + * \param options a pointer to a options struct + * \return the error code + */ +mrss_error_t mrss_get_last_modified_with_options + (char * urlstring, + time_t * lastmodified, + mrss_options_t * options); +/** + * Like the previous function but with a CURLcode pointer. + * + * \param urlstring the url + * \param lastmodified is a pointer to a time_t struct. The return value can + * be 0 if the HEAD request does not return a Last-Modified value. + * \param options a pointer to a options struct + * \param curl_code it will contain the error code of libcurl + * \return the error code + */ +//mrss_error_t mrss_get_last_modified_with_options_and_error + //(char * urlstring, + //time_t * lastmodified, + //mrss_options_t * options, + //CURLcode * curl_code); + +/** EDIT FUNCTIONS **********************************************************/ + +/** If you want create a new feed RSS from scratch, you need use + * this function as the first. + * + * \code + * mrss_t *d; + * mrss_error_t err; + * char *string; + * int integer; + * + * d=NULL; // ->this is important! If d!=NULL, mrss_new doesn't alloc memory. + * mrss_new(&d); + * + * err=mrss_set (d, + * MRSS_FLAG_VERSION, MRSS_VERSION_0_92, + * MRSS_FLAG_TITLE, "the title!", + * MRSS_FLAG_TTL, 12, + * MRSS_FLAG_END); + * + * if(err!=MRSS_OK) printf("%s\n",mrss_strerror(err)); + * + * err=mrss_get (d, + * MRSS_FLAG_TITLE, &string, + * MRSS_FLAG_TTL, &integer, + * MRSS_FLAG_END); + * + * if(err!=MRSS_OK) printf("%s\n",mrss_strerror(err)); + * printf("The title is: '%s'\n", string); + * printf("The ttl is: '%d'\n", integer); + * free(string); + * \endcode + * + * \param mrss is the pointer to the new data struct + * \return the error code + */ +mrss_error_t mrss_new (mrss_t ** mrss); + +/** + * For insert/replace/remove a flags use this function as this example: + * \code + * mrss_set(mrss, MRSS_FLAG_TITLE, "hello world", MRSS_FLAG_END); + * mrss_set(item, MRSS_FLAG_DESCRIPTION, NULL, MRSS_FLAG_END); + * \endcode + * + * \param element it is the mrss data that you want changes the the next + * list of elements. The list is composted by KEY - VALUES and as last + * element MRSS_FLAG_END. The variable of value depends from key. + * \see mrss_flag_t + * \return the error code + */ +mrss_error_t mrss_set (mrss_generic_t element, + ...); + +/** + * This function returns the request arguments. The syntax is the same of + * mrss_set but the values of the list are pointer to data element (int *, + * char **). If the key needs a char **, the value will be allocated. + * \code + * mrss_get(category, MRSS_FLAG_CATEGORY_DOMAIN, &string, MRSS_FLAG_END); + * if(string) free(string); + * \endcode + * \param element it is any type of mrss data struct. + * \return the error code + */ +mrss_error_t mrss_get (mrss_generic_t element, + ...); + +/** + * This function adds an element to another element. For example you can + * add a item to a channel, or a category to a item, and so on. Look this + * example: + * \code + * mrss_item_t *item = NULL; + * mrss_hour_t *hour = NULL; + * mrss_day_t day; // If the element is no null, the function + * mrss_category_t category, // does not alloc it + * + * mrss_new_subdata(mrss, MRSS_ELEMENT_ITEM, &item); + * mrss_new_subdata(mrss, MRSS_ELEMENT_SKIPHOURS, &hour); + * mrss_new_subdata(mrss, MRSS_ELEMENT_SKIPDAYS, &day); + * mrss_new_subdata(item, MRSS_ELEMENT_ITEM_CATEGORY, &category); + * \endcode + * \param element it is the parent element + * \param subelement it is the type of the child (MRSS_ELEMENT_ITEM, + * MRSS_ELEMENT_CATEGORY, ...) + * \param subdata it is the pointer to the new struct. If the pointer + * of *subdata exists, it will no alloced, else yes. + * \return the error code + * \see mrss_element_t + */ +mrss_error_t mrss_new_subdata (mrss_generic_t element, + mrss_element_t subelement, + mrss_generic_t subdata); + +/** + * This function removes a subdata element. As first argoment you must specify + * the parent, and second argoment the child. + * \code + * mrss_remove_subdata(mrss, item); + * \endcode + * \param element it is the parent + * \param subdata the child that you want remove. Remember: + * mrss_remove_subdata does not free the memory. So you can remove a item + * and reinsert it after. + * \return the error code + */ +mrss_error_t mrss_remove_subdata (mrss_generic_t element, + mrss_generic_t subdata); + +/* TAGS FUNCTIONS **********************************************************/ + +/** + * This function search a tag in a mrss_t, a mrss_item_t or a mrss_tag_t from + * name and a namespace. + * \param element it is the parent node (mrss_t or mrss_item_t) + * \param name the name of the element + * \param ns the namespace. It can be null if the tag has a null namespace + * \param tag the return pointer + * \return the error code + */ +mrss_error_t mrss_search_tag (mrss_generic_t element, + char * name, + char * ns, + mrss_tag_t ** tag); + +/** + * This function search an attribute from a mrss_tag_t, a name and a namespace + * \param element it is the mrss_tag_t + * \param name the name of the element + * \param ns the namespace. It can be null if the tag has a null namespace + * \param attribute the return pointer + * \return the error code + */ +mrss_error_t mrss_search_attribute (mrss_generic_t element, + char * name, + char * ns, + mrss_attribute_t ** attribute); + +/* OPTIONS FUNCTIONS *******************************************************/ + +/** + * This function creates a options struct. + * + * \param timeout timeout for the download procedure + * \param proxy a proxy server. can be NULL + * \param proxy_authentication a proxy authentication (user:pwd). can be NULL + * \param certfile a certificate for ssl autentication connection + * \param password the password of certfile + * \param cacert CA certificate to verify peer against. can be NULL + * \param verifypeer active/deactive the peer check + * \param authentication an authentication login (user:pwd). can be NULL + * \param user_agent a user_agent. can be NULL + * \return a pointer to a new allocated mrss_options_t struct + */ +mrss_options_t * + mrss_options_new (int timeout, + char *proxy, + char *proxy_authentication, + char *certfile, + char *password, + char *cacert, + int verifypeer, + char *authentication, + char *user_agent); + +/** + * This function destroys a options struct. + * \param options a pointer to a options struct + */ +void mrss_options_free (mrss_options_t *options); + +} diff --git a/source/cartastraccia/renderer.d b/source/cartastraccia/renderer.d index 91cf99f..a093884 100644 --- a/source/cartastraccia/renderer.d +++ b/source/cartastraccia/renderer.d @@ -1,3 +1,27 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * Feeds visualization rendering (HTML only, for now). + * +*/ + + module cartastraccia.renderer; import cartastraccia.rss; diff --git a/source/cartastraccia/rss.d b/source/cartastraccia/rss.d index 14e52d6..1b19ba3 100644 --- a/source/cartastraccia/rss.d +++ b/source/cartastraccia/rss.d @@ -1,22 +1,44 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * RSS data structures, types and parsing. + * +*/ module cartastraccia.rss; import cartastraccia.actor : FeedActorRequest; +import cartastraccia.include.mrss; import vibe.core.log; import vibe.http.server : render; -import dxml.parser; import sumtype; -import std.algorithm : startsWith, sort; +import std.algorithm : startsWith, sort, move; import std.datetime; import std.range; import std.conv : to; +import std.string; public: alias RSS = SumType!(ValidRSS, InvalidRSS, FailedRSS); - /** * In case the RSS feed couldn't be loaded */ @@ -59,7 +81,6 @@ // optional elements string language; string copyright; - string managingEditor; string webMaster; string pubDate; string lastBuildDate; @@ -125,151 +146,67 @@ } /** - * Entrypoint for parsing a rss feed (repsesented as string) + * Entry point for parsing a rss feed (represented as string) + * Parsing done using libmrss (see cartastraccia.include.mrss) */ -void parseRSS(ref RSS rss, immutable string feed) @trusted +void parseRSS(ref RSS rss, string feed) @trusted { - auto rssRange = parseXML!simpleXML(feed); + mrss_t* rssData; + size_t len; + auto fz = feed.toZString(len); + mrss_error_t err = mrss_parse_buffer(fz, len, &rssData); - if(rssRange.front.name == "html") { - rss = InvalidRSS("html", ""); - return; + if(err) { + rss = InvalidRSS("mrss", err.to!string); + + } else { + rss = ValidRSS(); + + rss.tryMatch!( + (ref ValidRSS vrss) { + newChannel(rssData, vrss); + + mrss_item_t* item = rssData.item; + while(item) { + newItem(item, vrss); + item = item.next; + } + }); } - while(rssRange.front.type != EntityType.text && rssRange.front.name != "channel") { - rssRange.popFront(); - } - rssRange.popFront(); - - alias C = typeof(rssRange); - insertElement!(RSSChannel, RSS, C)(rss, rss, rssRange); - // parse date and sort in descending order (newest first) rss.tryMatch!( (ref InvalidRSS i) { - logWarn("Invalid RSS for feed: " ~ feed); return; }, (ref ValidRSS vr) { - vr.channel.items.sort!( (i,j) => - (parseRFC822DateTime(i.pubDate) - > parseRFC822DateTime(j.pubDate))); + vr.channel.items.sort!( (i,j) { + return (parseRFC822DateTime(i.pubDate) + > parseRFC822DateTime(j.pubDate)); + }); }); } - private: -/** - * Insert an element (RSSChannel or RSSItem) which has: - * - A parent (be it the RSS xml root (RSSChannel - * or the RSSChannel in case of an RSSItem - * - Various sub-entries which are processed sequentially - * by advancing rssRange -*/ -void insertElement(ElementType, Parent, C)( - ref RSS rss, ref Parent parent, ref C rssRange) @trusted +void newChannel(mrss_t* rssData, ref ValidRSS rss) { - ElementType newElement; - - mixin(selectElementName); - - // advance the parser to completion, entry by entry - while(rssRange.front.type != EntityType.elementEnd - && rssRange.front.type != EntityType.text - && rssRange.front.name != elname) { - - immutable name = rssRange.front.name; - rssRange.popFront(); - - if(name == "item") { - - // recursively insert items - static if(is(ElementType == RSSChannel)) { - insertElement!(RSSItem, RSSChannel, C)(rss, newElement, rssRange); - } else { - rss = InvalidRSS(name, ""); - } - - } else if(name == "image" || name == "media:content") { - // skip images - while(rssRange.front.type != EntityType.elementEnd - && rssRange.front.name != name) { - rssRange.popFront(); // elementStart - rssRange.popFront(); // text - rssRange.popFront(); // elementEnd - } - - } else if(rssRange.front.type == EntityType.text - || rssRange.front.type == EntityType.cdata) { - - // found a valid text field - immutable content = rssRange.front.text; - rssRange.popFront(); - - fill: switch(name) { - - default: - // we don't care about entries which are not attributes of RSSChannel - logDebug("Ignoring XML Entity: " ~ name); - break fill; - - // inserting a channel - static if(is(ElementType == RSSChannel)) { - static foreach(m; __traits(allMembers, RSSChannel)) { - static if(m != "items") { - case m: - mixin("newElement."~m~" = content;"); - break fill; - } - } - - // inserting an item - } else if(is(ElementType == RSSItem)) { - static foreach(m; __traits(allMembers, RSSItem)) { - case m: - mixin("newElement."~m~" = content;"); - break fill; - } - - // should not get here (means function invocation was invalid) - } else assert(false, "Invalid ElementType requested"); - } + static foreach(m; __traits(allMembers, RSSChannel)) { + static if(is(typeof(__traits(getMember, mrss_t, m)) == char*)) { + mixin("rss.channel."~m~" = rssData."~m~".ZtoString.idup;"); } - // skip elementEnd - rssRange.popFront(); } - - // finished channel / item parsing. Insert it into rss struct - rss.match!( - (ref InvalidRSS i) { - logWarn("Invalid XML Entity detected: " - ~ i.element - ~ ": " - ~ i.content); - }, - (ref FailedRSS f) {}, - (ref ValidRSS v) { - static if(is(ElementType == RSSChannel)) - parent.tryMatch!( - (ref ValidRSS v) { - v.channel = newElement; - }); - else if(is(ElementType == RSSItem)) - parent.items ~= newElement; - logInfo("Inserted " ~ elname ~ ": " ~ newElement.title); - }); } -static immutable string selectElementName = " - string elname; +void newItem(mrss_item_t* rssItem, ref ValidRSS rss) +{ + RSSItem newItem; - static if(is(ElementType == RSSChannel)) { - elname = \"channel\"; - static assert(is(Parent == RSS)); - } else if(is(ElementType == RSSItem)) { - elname = \"item\"; - static assert(is(Parent == RSSChannel)); - } else assert(false, \"Invalid ElementType provided\"); -"; + static foreach(m; __traits(allMembers, RSSItem)) { + static if(is(typeof(__traits(getMember, mrss_item_t, m)) == char*)) { + mixin("newItem."~m~" = rssItem."~m~".to!string;"); + } + } + rss.channel.items ~= newItem; +} diff --git a/source/main.d b/source/main.d new file mode 100644 index 0000000..afbee86 --- /dev/null +++ b/source/main.d @@ -0,0 +1,193 @@ +/** + * Copyright (c) 2019 Francesco Galla` - + * + * This file is part of cartastraccia. + * + * cartastraccia is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * cartastraccia is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with cartastraccia. If not, see . + * --- + * + * Main program launcher. + * +*/ + +module main; + +import cartastraccia.config; +import cartastraccia.asciiart; +import cartastraccia.actor; +import cartastraccia.endpoint; + +import vibe.core.log; +import vibe.core.file; +import vibe.http.server; +import vibe.http.router; +import vibe.http.fileserver; +import vibe.inet.url; +import vibe.http.client; +import vibe.web.web; +import vibe.core.core; +import vibe.stream.operations : readAllUTF8; +import vibe.core.concurrency; +import pegged.grammar; +import sumtype; +import requests; + +import std.exception; +import std.stdio; +import std.file : readText; +import std.algorithm : each; +import std.datetime : SysTime; +import std.getopt; +import std.conv : to; +import std.process; + +immutable string info = " +============================================= +| Carta Straccia is a RSS feed aggregator | +============================================= +0. Write a feeds.conf file [feed_name refresh_timeout feed_url] +> echo \"Stallman 3h https://stallman.org/rss/rss.xml\" > feeds.conf +--------------------------------------------- +1. Start the daemon: +> cartastraccia --daemon --endpoint=cli --endpoint=html --feeds=feeds.conf +--------------------------------------------- +2. Connect to daemon using HTML endpoint +> cartastraccia --browser=/path/to/browser +---------------------------------------------"; + +void runWebServer(ref URLRouter router, immutable string bindAddress, immutable ushort bindPort) +{ + auto settings = new HTTPServerSettings; + settings.port = bindPort; + settings.bindAddresses = ["127.0.0.1", bindAddress]; + + listenHTTP(settings, router); + runEventLoop(); +} + +void runDaemon(immutable string feedsFile, immutable + string bindAddress, immutable ushort bindPort) +{ + // parse feed list + auto pt = ConfigFile(readText(feedsFile)); + enforce(pt.successful, "Invalid "~feedsFile~" file format, check cartastraccia.config for grammar"); + auto feeds = processFeeds(pt); + TaskMap tasks; + + feeds.match!( + (InvalidFeeds i) { + logWarn("Invalid feeds processed. Exiting."); + return; + }, + (RSSFeed[] fl) { + + // n. threads == n. feeds + setupWorkerThreads(fl.length.to!uint); + + // start tasks in charge of updating feeds + feeds.match!( + (InvalidFeeds i) => logFatal(i.msg), + (RSSFeed[] fl) { + fl.each!( + (RSSFeed feed) { + logInfo("Starting task: "~feed.name); + // start workers to serve RSS data + tasks[feed.name] = runWorkerTaskH( + &feedActor, feed.name, feed.path, 0); + }); + }); + + // initialize a new service to serve requests + auto router = new URLRouter; + router.registerWebInterface(new EndpointService(feeds, tasks)); + router.get("*", serveStaticFiles("public/")); + + // start the webserver in main thread + runWebServer(router, bindAddress, bindPort); + }); +} + +void runClient(EndpointType endpoint, immutable string browser, immutable string + bindAddress, immutable ushort bindPort, immutable bool reloadFeeds) +{ + + if(reloadFeeds) { + try { + string url = "http://"~bindAddress~":"~bindPort.to!string~"/reload"; + auto req = Request(); + req.keepAlive = false; + req.timeout = ACTOR_REQ_TIMEOUT; + req.get(url); + + } catch (Exception e) { + logWarn("ERROR from daemon: "~e.msg~"\nCannot reload feeds file."); + } + } + + if(endpoint == EndpointType.cli) { + try { + string url = "http://"~bindAddress~":"~bindPort.to!string~"/cli"; + auto req = Request(); + req.keepAlive = false; + req.timeout = ACTOR_REQ_TIMEOUT; + req.get(url); + + } catch (Exception e) { + logWarn("ERROR from daemon: "~e.msg~"\nCheck daemon logs for details (is it running?)"); + } + + } else if(endpoint == EndpointType.html) { + + if(!existsFile(browser)) { + logWarn("Could not find browser: "~browser); + logWarn("Try running: cartastraccia --browser=[/path/to/browser]"); + return; + } + + immutable address = "http://"~bindAddress~":"~bindPort.to!string; + auto pid = spawnShell(browser ~" "~address); + wait(pid); + } +} + +void main(string[] args) +{ + // CLI arguments + bool daemon = false; + EndpointType endpoint = EndpointType.html; + string feedsFile = "feeds.conf"; + string bindAddress = "localhost"; + ushort bindPort = 8080; + string browser = "/usr/bin/elinks"; + bool reloadFeeds = false; + + auto helpInformation = getopt( + args, + "daemon|d", "Start daemon", &daemon, + "endpoint|e", "Endpoints to register [cli]", &endpoint, + "feeds|f", "File containing feeds to pull [feeds.conf]", &feedsFile, + "host|l", "Bind to this address [localhost]", &bindAddress, + "port|p", "Bind to this port [8080]", &bindPort, + "browser|b", "Absolute path to browser for HTML rendering [/usr/bin/elinks]", &browser, + "reload|r", "Reload feeds file", &reloadFeeds + ); + + if(helpInformation.helpWanted) { + defaultGetoptPrinter(info, helpInformation.options); + return; + } + + if(daemon) runDaemon(feedsFile, bindAddress, bindPort); + else runClient(endpoint, browser, bindAddress, bindPort, reloadFeeds); +}