add filter feature
This commit is contained in:
parent
d630cc96c4
commit
fa542ee56e
3 changed files with 36 additions and 18 deletions
|
|
@ -72,6 +72,10 @@ postprocessor = 'pandoc -f html -t markdown_strict-raw_html --reference-links --
|
||||||
# Fileending for the article files.
|
# Fileending for the article files.
|
||||||
fileending = 'md'
|
fileending = 'md'
|
||||||
|
|
||||||
|
# List of regular expression strings. If any of these matches an lowercase article title, the article won't be saved.
|
||||||
|
# E.g. if you wan't to skip news about RSS explicitly, add '(\W|^)rss(\W|$)'.
|
||||||
|
filters = []
|
||||||
|
|
||||||
# Date and time format as strftime to be included in the articles.
|
# Date and time format as strftime to be included in the articles.
|
||||||
datetime_format = '%d.%m.%Y %H:%M'
|
datetime_format = '%d.%m.%Y %H:%M'
|
||||||
|
|
||||||
|
|
@ -153,6 +157,6 @@ Just synchronize the base_directory with [Syncthing](https://syncthing.net/), [r
|
||||||
|
|
||||||
## Acknowledgements
|
## Acknowledgements
|
||||||
|
|
||||||
Thanks to all the people, who created the nice libraries this project in based on.
|
Thanks to all the people who created the nice software, this project in based on.
|
||||||
And also thanks to Dieter Steffmann who created the Canterbury font, which is used for the logo.
|
And also thanks to Dieter Steffmann who created the Canterbury font, which is used for the logo.
|
||||||
You can find it in the `fonts/` directory.
|
You can find it in the `fonts/` directory.
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,10 @@ postprocessor = 'pandoc -f html -t markdown_strict-raw_html+pipe_tables --refere
|
||||||
# Fileending for the article files.
|
# Fileending for the article files.
|
||||||
fileending = 'md'
|
fileending = 'md'
|
||||||
|
|
||||||
|
# List of regular expression strings. If any of these matches an lowercase article title, the article won't be saved.
|
||||||
|
# E.g. if you wan't to skip news about RSS explicitly, add '(\W|^)rss(\W|$)'.
|
||||||
|
filters = []
|
||||||
|
|
||||||
# Feeds
|
# Feeds
|
||||||
# The category can be empty (''). The feed fill then be stored in the base_directory.
|
# The category can be empty (''). The feed fill then be stored in the base_directory.
|
||||||
# The category can also be a path, which will result in subdirectories (e.g. 'technology/hardware').
|
# The category can also be a path, which will result in subdirectories (e.g. 'technology/hardware').
|
||||||
|
|
|
||||||
12
spiderss.py
12
spiderss.py
|
|
@ -249,6 +249,15 @@ def update_feed(feed):
|
||||||
|
|
||||||
if date > threshold_date:
|
if date > threshold_date:
|
||||||
|
|
||||||
|
# Check if article should be filtered
|
||||||
|
filter = False
|
||||||
|
for f in filters:
|
||||||
|
if re.search(f, a.title.lower()):
|
||||||
|
filter = True
|
||||||
|
log(' filtered article "{}"'.format(a.title))
|
||||||
|
|
||||||
|
if not filter:
|
||||||
|
|
||||||
# Construct filename
|
# Construct filename
|
||||||
filename_prefix = date.strftime('%Y%m%d%H%M')
|
filename_prefix = date.strftime('%Y%m%d%H%M')
|
||||||
filename_postfix = get_filename_postfix(a.title)
|
filename_postfix = get_filename_postfix(a.title)
|
||||||
|
|
@ -294,7 +303,7 @@ def remove_old_articles():
|
||||||
# Parse config file
|
# Parse config file
|
||||||
def load_config(filepath):
|
def load_config(filepath):
|
||||||
|
|
||||||
global base_directory, max_age, datetime_format, postprocessor, fileending, feeds
|
global base_directory, max_age, datetime_format, postprocessor, fileending, filters, feeds
|
||||||
|
|
||||||
try:
|
try:
|
||||||
config = toml.load(filepath)
|
config = toml.load(filepath)
|
||||||
|
|
@ -303,6 +312,7 @@ def load_config(filepath):
|
||||||
datetime_format = config['datetime_format']
|
datetime_format = config['datetime_format']
|
||||||
postprocessor = config['postprocessor']
|
postprocessor = config['postprocessor']
|
||||||
fileending = config['fileending']
|
fileending = config['fileending']
|
||||||
|
filters = config['filters']
|
||||||
feeds = config['feed']
|
feeds = config['feed']
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error('while parsing config: {}'.format(e))
|
error('while parsing config: {}'.format(e))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue