add filter feature

This commit is contained in:
Denis Lehmann 2020-04-26 19:58:47 +02:00
parent d630cc96c4
commit fa542ee56e
3 changed files with 36 additions and 18 deletions

View file

@ -72,6 +72,10 @@ postprocessor = 'pandoc -f html -t markdown_strict-raw_html --reference-links --
# Fileending for the article files. # Fileending for the article files.
fileending = 'md' fileending = 'md'
# List of regular expression strings. If any of these matches an lowercase article title, the article won't be saved.
# E.g. if you wan't to skip news about RSS explicitly, add '(\W|^)rss(\W|$)'.
filters = []
# Date and time format as strftime to be included in the articles. # Date and time format as strftime to be included in the articles.
datetime_format = '%d.%m.%Y %H:%M' datetime_format = '%d.%m.%Y %H:%M'
@ -153,6 +157,6 @@ Just synchronize the base_directory with [Syncthing](https://syncthing.net/), [r
## Acknowledgements ## Acknowledgements
Thanks to all the people, who created the nice libraries this project in based on. Thanks to all the people who created the nice software, this project in based on.
And also thanks to Dieter Steffmann who created the Canterbury font, which is used for the logo. And also thanks to Dieter Steffmann who created the Canterbury font, which is used for the logo.
You can find it in the `fonts/` directory. You can find it in the `fonts/` directory.

View file

@ -13,6 +13,10 @@ postprocessor = 'pandoc -f html -t markdown_strict-raw_html+pipe_tables --refere
# Fileending for the article files. # Fileending for the article files.
fileending = 'md' fileending = 'md'
# List of regular expression strings. If any of these matches an lowercase article title, the article won't be saved.
# E.g. if you wan't to skip news about RSS explicitly, add '(\W|^)rss(\W|$)'.
filters = []
# Feeds # Feeds
# The category can be empty (''). The feed fill then be stored in the base_directory. # The category can be empty (''). The feed fill then be stored in the base_directory.
# The category can also be a path, which will result in subdirectories (e.g. 'technology/hardware'). # The category can also be a path, which will result in subdirectories (e.g. 'technology/hardware').

View file

@ -249,6 +249,15 @@ def update_feed(feed):
if date > threshold_date: if date > threshold_date:
# Check if article should be filtered
filter = False
for f in filters:
if re.search(f, a.title.lower()):
filter = True
log(' filtered article "{}"'.format(a.title))
if not filter:
# Construct filename # Construct filename
filename_prefix = date.strftime('%Y%m%d%H%M') filename_prefix = date.strftime('%Y%m%d%H%M')
filename_postfix = get_filename_postfix(a.title) filename_postfix = get_filename_postfix(a.title)
@ -294,7 +303,7 @@ def remove_old_articles():
# Parse config file # Parse config file
def load_config(filepath): def load_config(filepath):
global base_directory, max_age, datetime_format, postprocessor, fileending, feeds global base_directory, max_age, datetime_format, postprocessor, fileending, filters, feeds
try: try:
config = toml.load(filepath) config = toml.load(filepath)
@ -303,6 +312,7 @@ def load_config(filepath):
datetime_format = config['datetime_format'] datetime_format = config['datetime_format']
postprocessor = config['postprocessor'] postprocessor = config['postprocessor']
fileending = config['fileending'] fileending = config['fileending']
filters = config['filters']
feeds = config['feed'] feeds = config['feed']
except Exception as e: except Exception as e:
error('while parsing config: {}'.format(e)) error('while parsing config: {}'.format(e))