#!/bin/bash
##############################################################################
# Version: 0.5
# find_python_module_requires.sh - Takes one or more files/directories (i.e.,
# 'sources/python-mod/' or 'path/to/foo.py'), and extracts the import calls
# from the scripts, each on a new line (as standard Python
# 'import ... [as ...]' or 'from ... import ... [as ...]') calls and
# evaluates them in Python, then uses Python's output of the file containing
# the module to query RPM to find the proper package it belongs to, ready
# for copy and paste into a spec file's Requires sections. It will first
# attempt to match the module to installed packages. If this does not work,
# perhaps because the necessary package is not installed, then it will use
# repoquery to find the package based on a standard filename pattern for
# python modules.
#
# It also will print to stderr a list of all modules that it could not
# successfully import. This is not a fool-proof tool, and missing modules
# may in fact be from other packages in disabled repositories, for example.
#
# Also, please note that this outputs every dependency package, which means
# that there will likely be duplicates among them. (For example, it may
# print Requires for each of python, pygtk2, and pygobject2; but only the
# pygtk2 dependency is required, as it will pull in pygobject2 and python
# through its dependency chain.) You'll also need to check versions
# manually. I'm hoping to solve a bunch of these when I learn more about
# the internals of Python modules and whatnot and can perhaps refactor this
# into some callbacks for those.
##############################################################################
# Copyright (C) 2007-2012 Peter Gordon
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor
# Boston, MA 02110-1301, USA.
##############################################################################
# __ ChangeLog __
# 2012-07-10
# * Version 0.5
# - Use repoquery to check for modules if no installed packages match.
# - This will probably be the last version to be Bash-scripted. Hopefully,
# future incarnations will invoke the necessary Python/RPM/Yum queries
# directly through their respective modules, rather than parsing
# chaining commands.
# - Add a ...|sort|uniq pipe to the initial grep invocation, to prevent
# searching for multiple copies of the same module.
# 2009-06-01
# * Version 0.4
# - Don't hardcode the Python version.
# - Fix the initial grep regex to catch import calls more precisely.
# 2009-01-24
# * Version 0.3
# - Update string greps for rpm-query error detection.
# 2007-02-08
# * Version 0.2
# - Added support for checking if the queried directories provide a module
# not found in the standard search paths. It will still print any unfound
# module names to stderr.
# 2007-02-07
# * Version 0.1
# - Initial public release
##############################################################################
## Check for required utilities...
for REQUIRED_PROG in python awk xargs rpm grep sort uniq perl mktemp repoquery;
do
which ${REQUIRED_PROG} &>/dev/null
if [ $? -ne 0 ]; then
echo "Unable to find required program '${REQUIRED_PROG}' in PATH."
exit 1;
fi
done
##...set some variables...
TMPFILE=$(mktemp)
LANG=C
## ...Now, this is where the fun begins. :]
## [from parentModule] import module [as name][,module2 [as name2][,...]]
grep --no-filename -P '^((\s*)from(\s*)(.*))?(\s*)import(\s*)' -r "$@" \
| sed -e 's/^\s*//g' \
| sort | uniq \
| while read MOD; do
## Run queries...
MOD_NAME=$(echo -E "${MOD}" | sed -e 's/^.*import\s//' \
| sed -e 's/^\*$/__init__/')
MOD_PARENT=$(echo -E "${MOD}" | perl -nle 'print $2 if m/^(from )(.*?)(\s+)(import)(\s+)(.*)$/')
MOD_ALIAS=$(echo -E "${MOD_NAME}" | perl -nle 'print $1 if m/\sas (.*?)$/');
if [ ! -z "${MOD_ALIAS}" ]; then
MOD_NAME=${MOD_ALIAS}
fi
if [ ! -z "${MOD_PARENT}" ]; then
MOD="import ${MOD_PARENT}"
MOD_NAME=${MOD_PARENT}
fi
python -c "$MOD; print $MOD_NAME" 2>>"${TMPFILE}" \
| sed -e 's/>.*>\n&1
done \
| grep -v 'rpm: no arguments given for query' \
| sort | uniq >/dev/stdout
awk -F "ImportError: No module named " '{print $2}' < "${TMPFILE}" \
| sort | uniq \
| while read MOD; do
FOUND=0
for ENTRY in "$@"; do
## Test that the module is not provided by the package itself
if [ -f "${ENTRY}" ]; then
ENTRY="$(dirname ${ENTRY})"
fi
pushd "${ENTRY}" &>/dev/null
python -c "import ${MOD}" 2>/dev/null
if [ $? -eq 0 ]; then
## Import was successful!
FOUND=1
fi
popd &>/dev/null
if [ $FOUND -eq 1 ]; then
## Don't keep trying other dirs if we found the module
## already.
continue 2
fi
done;
if [ $FOUND -ne 1 ]; then
## We didn't find it. We need to use yum to find the file for
## that Python module. This assumes a module name of "foo" or
## "foo.bar", and will truncate $MOD accordingly. It then
## attempts to match it with standard Python directories for
## the module name.
TOP_MOD=${MOD%%.*}
BASE_MOD=${MOD/${TOP_MOD}\./}
BASE_MOD=${BASE_MOD%%.*}
MOD_FILE=${TOP_MOD}
if [ -n "${BASE_MOD}" ]; then
MOD_FILE="${MOD_FILE}/${BASE_MOD}"
fi
if [ -n "${MOD_FILE}" ]; then
DEP=$(repoquery --qf 'Requires:\t%{NAME}' -f "/usr/lib*/python?.?/site-packages/${MOD_FILE}"{.py*,/__init__.py*})
if [ -z "${DEP}" ]; then
echo -e "Unable to match module '${MOD}'\n" >/dev/stderr
else
echo -e "${DEP}\n"
fi
fi
fi
done
## Clean up
rm -f "${TMPFILE}"