Home Find all files in a directory with extension .txt in Python
Reply: 29

Find all files in a directory with extension .txt in Python

usertest
1#
usertest Published in 2010-10-19 01:09:13Z

How can I find all the files in a directory having the extension .txt in python?

Ev. Kounis
2#
Ev. Kounis Reply to 2017-02-23 10:16:20Z

You can use glob:

import glob, os
os.chdir("/mydir")
for file in glob.glob("*.txt"):
    print(file)

or simply os.listdir:

import os
for file in os.listdir("/mydir"):
    if file.endswith(".txt"):
        print(os.path.join("/mydir", file))

or if you want to traverse directory, use os.walk:

import os
for root, dirs, files in os.walk("/mydir"):
    for file in files:
        if file.endswith(".txt"):
             print(os.path.join(root, file))
Muhammad Alkarouri
3#
Muhammad Alkarouri Reply to 2010-10-19 01:11:34Z

Use glob.

>>> import glob
>>> glob.glob('./*.txt')
['./outline.txt', './pip-log.txt', './test.txt', './testingvim.txt']
MSeifert
4#
MSeifert Reply to 2017-04-28 00:04:15Z

Something like that should do the job

for root, dirs, files in os.walk(directory):
    for file in files:
        if file.endswith('.txt'):
            print file
Seth
5#
Seth Reply to 2010-10-19 01:13:02Z

Something like this will work:

>>> import os
>>> path = '/usr/share/cups/charmaps'
>>> text_files = [f for f in os.listdir(path) if f.endswith('.txt')]
>>> text_files
['euc-cn.txt', 'euc-jp.txt', 'euc-kr.txt', 'euc-tw.txt', ... 'windows-950.txt']
hughdbrown
6#
hughdbrown Reply to 2010-10-19 01:27:09Z

I like os.walk():

import os, os.path

for root, dirs, files in os.walk(dir):
    for f in files:
        fullpath = os.path.join(root, f)
        if os.path.splitext(fullpath)[1] == '.txt':
            print fullpath

Or with generators:

import os, os.path

fileiter = (os.path.join(root, f)
    for root, _, files in os.walk(dir)
    for f in files)
txtfileiter = (f for f in fileiter if os.path.splitext(f)[1] == '.txt')
for txt in txtfileiter:
    print txt
jfs
7#
jfs Reply to 2010-10-19 18:51:53Z

Here's more versions of the same that produce slightly different results:

glob.iglob()

import glob
for f in glob.iglob("/mydir/*/*.txt"): # generator, search immediate subdirectories 
    print f

glob.glob1()

print glob.glob1("/mydir", "*.tx?")  # literal_directory, basename_pattern

fnmatch.filter()

import fnmatch, os
print fnmatch.filter(os.listdir("/mydir"), "*.tx?") # include dot-files
mrgloom
8#
mrgloom Reply to 2012-08-17 06:27:45Z
import os
import sys 

if len(sys.argv)==2:
    print('no params')
    sys.exit(1)

dir = sys.argv[1]
mask= sys.argv[2]

files = os.listdir(dir); 

res = filter(lambda x: x.endswith(mask), files); 

print res
Anuvrat Parashar
9#
Anuvrat Parashar Reply to 2012-12-22 08:47:24Z

path.py is another alternative: https://github.com/jaraco/path.py

from path import path
p = path('/path/to/the/directory')
for f in p.files(pattern='*.txt'):
    print f
praba230890
10#
praba230890 Reply to 2013-11-19 21:59:47Z

This code makes my life simpler.

import os
fnames = ([file for root, dirs, files in os.walk(dir)
    for file in files
    if file.endswith('.txt') #or file.endswith('.png') or file.endswith('.pdf')
    ])
for fname in fnames: print(fname)
Brad Koch
11#
Brad Koch Reply to 2014-02-18 20:30:27Z

You can try this code:

import glob
import os

os.chdir("D:\...\DirName")
filename_arr={}
i=0
for files in glob.glob("*.txt"):
    filename_arr[i] = files
    i= i+1

for key,value in filename_arr.items():
    print key , value
x01saa
12#
x01saa Reply to 2014-04-06 12:09:50Z

You can try this code

import glob
import os
filenames_without_extension = [os.path.basename(c).split('.')[0:1][0] for c in glob.glob('your/files/dir/*.txt')]
filenames_with_extension = [os.path.basename(c) for c in glob.glob('your/files/dir/*.txt')]
CodeTarsier
13#
CodeTarsier Reply to 2014-07-09 11:10:18Z
import glob,os

data_dir = 'data_folder/'
file_dir_extension = os.path.join(data_dir, '*.txt')

for file_name in glob.glob(file_dir_extension):
    if file_name.endswith('.txt'):
        print file_name

For me. It's classic.

duskwuff
14#
duskwuff Reply to 2014-07-17 20:24:54Z
import os

path = 'mypath/path' 
files = os.listdir(path)

files_txt = [i for i in files if i.endswith('.txt')]
Adam Chrapkowski
15#
Adam Chrapkowski Reply to 2014-10-25 23:46:49Z

Functional solution with sub-directories:

from fnmatch import filter
from functools import partial
from itertools import chain
from os import path, walk

print(*chain(*(map(partial(path.join, root), filter(filenames, "*.txt")) for root, _, filenames in walk("mydir"))))
yucer
16#
yucer Reply to 2016-04-19 08:35:31Z

Use fnmatch: https://docs.python.org/2/library/fnmatch.html

import fnmatch
import os

for file in os.listdir('.'):
    if fnmatch.fnmatch(file, '*.txt'):
        print file
Wasi Ahmad
17#
Wasi Ahmad Reply to 2017-03-19 17:54:38Z

Try this this will find all your file inside folder or folder

import glob, os
os.chdir("H:\\wallpaper")# use whatever you directory 

#double\\ no single \

for file in glob.glob("**/*.psd", recursive = True):#your format
    print(file)
Rajiv Sharma
18#
Rajiv Sharma Reply to 2016-07-26 08:25:16Z

use Python OS module to find files with specific extension.

the simple example is here :

import os

# This is the path where you want to search
path = r'd:'  

# this is extension you want to detect
extension = '.txt'   # this can be : .jpg  .png  .xls  .log .....

for root, dirs_list, files_list in os.walk(path):
    for file_name in files_list:
        if os.path.splitext(file_name)[-1] == extension:
            file_name_path = os.path.join(root, file_name)
            print file_name
            print file_name_path   # This is the full path of the filter file
Xxxo
19#
Xxxo Reply to 2016-08-30 06:51:17Z

Python has all tools to do this:

import os

the_dir = 'the_dir_that_want_to_search_in'
all_txt_files = filter(lambda x: x.endswith('.txt'), os.listdir(the_dir))
tashuhka
20#
tashuhka Reply to 2016-12-06 15:10:36Z

In case the folder contains a lot of files or memory is an constraint, consider using generators:

def yield_files_with_extensions(folder_path, file_extension):
   for _, _, files in os.walk(folder_path):
       for file in files:
           if file.endswith(file_extension):
               yield file

Option A: Iterate

for f in yield_files_with_extensions('.', '.txt'): 
    print(f)

Option B: Get all

files = [f for f in yield_files_with_extensions('.', '.txt')]
Nicolaesse
21#
Nicolaesse Reply to 2017-02-08 20:46:11Z

I suggest you to use fnmatch and the upper method. In this way you can find any of the following:

  1. Name.txt;
  2. Name.TXT;
  3. Name.Txt

.

import fnmatch
import os

    for file in os.listdir("/Users/Johnny/Desktop/MyTXTfolder"):
        if fnmatch.fnmatch(file.upper(), '*.TXT'):
            print(file)
banoth ravinder
22#
banoth ravinder Reply to 2017-03-05 19:58:38Z
import glob
import os

path=os.getcwd()

extensions=('*.py','*.cpp')

for i in extensions:
  for files in glob.glob(i):
     print files
Kamen Tsvetkov
23#
Kamen Tsvetkov Reply to 2017-04-27 23:35:17Z

To get an array of ".txt" file names from a folder called "data" in the same directory I usually use this simple line of code:

import os
fileNames = [fileName for fileName in os.listdir("data") if fileName.endswith(".txt")]
MSeifert
24#
MSeifert Reply to 2017-08-23 10:24:09Z

You can simply use pathlibs glob 1:

import pathlib

list(pathlib.Path('your_directory').glob('*.txt'))

or in a loop:

for txt_file in pathlib.Path('your_directory').glob('*.txt'):
    # do something with "txt_file"

If you want it recursive you can use .glob('**/*.txt)


1The pathlib module was included in the standard library in python 3.4. But you can install back-ports of that module even on older Python versions (i.e. using conda or pip): pathlib and pathlib2.

Martin Thoma
25#
Martin Thoma Reply to 2017-08-04 07:41:40Z

A copy-pastable solution similar to the one of ghostdog:

def get_all_filepaths(root_path, ext):
    """
    Search all files which have a given extension within root_path.

    This ignores the case of the extension and searches subdirectories, too.

    Parameters
    ----------
    root_path : str
    ext : str

    Returns
    -------
    list of str

    Examples
    --------
    >>> get_all_filepaths('/run', '.lock')
    ['/run/unattended-upgrades.lock',
     '/run/mlocate.daily.lock',
     '/run/xtables.lock',
     '/run/mysqld/mysqld.sock.lock',
     '/run/postgresql/.s.PGSQL.5432.lock',
     '/run/network/.ifstate.lock',
     '/run/lock/asound.state.lock']
    """
    import os
    all_files = []
    for root, dirs, files in os.walk(root_path):
        for filename in files:
            if filename.lower().endswith(ext):
                all_files.append(os.path.join(root, filename))
    return all_files
kfsone
26#
kfsone Reply to 2017-09-17 06:57:33Z

Many users have replied with os.walk answers, which includes all files but also all directories and subdirectories and their files.

import os


def files_in_dir(path, extension=''):
    """
       Generator: yields all of the files in <path> ending with
       <extension>

       \param   path       Absolute or relative path to inspect,
       \param   extension  [optional] Only yield files matching this,

       \yield              [filenames]
    """


    for _, dirs, files in os.walk(path):
        dirs[:] = []  # do not recurse directories.
        yield from [f for f in files if f.endswith(extension)]

# Example: print all the .py files in './python'
for filename in files_in_dir('./python', '*.py'):
    print("-", filename)

Or for a one off where you don't need a generator:

path, ext = "./python", ext = ".py"
for _, _, dirfiles in os.walk(path):
    matches = (f for f in dirfiles if f.endswith(ext))
    break

for filename in matches:
    print("-", filename)

If you are going to use matches for something else, you may want to make it a list rather than a generator expression:

    matches = [f for f in dirfiles if f.endswith(ext)]
Efreeto
27#
Efreeto Reply to 2017-10-19 23:34:10Z

Here's one with extend()

types = ('*.jpg', '*.png')
images_list = []
for files in types:
    images_list.extend(glob.glob(os.path.join(path, files)))
Giovanni Gianni
28#
Giovanni Gianni Reply to 2017-11-05 16:25:06Z
import os
[x for x in os.listdir() if x.endswith(".txt")]

HOW MANY FILES IN DIR AND SUBDIRS ?

If you want to know how many filese there are in a dir and subdirs:

In this example, we look for the number of files that are included in all the directory and its subdirecories.

import os    

def count(dir, counter=0):
    "returns number of files in dir and subdirs"
    for pack in os.walk(dir):
        for f in pack[2]:
            counter += 1
    return dir + " : " + str(counter) + "files"


print(count("F:\\python"))

output

'F:\python' : 12057 files'

BoRRis
29#
BoRRis Reply to 2017-12-10 07:57:12Z

A simple method by using for loop :

import os

dir = ["e","x","e"]

p = os.listdir('E:')  #path

for n in range(len(p)):
   name = p[n]
   myfile = [name[-3],name[-2],name[-1]]  #for .txt
   if myfile == dir :
      print(name)
   else:
      print("nops")

Though this can be made more generalised .

user136036
30#
user136036 Reply to 2018-01-19 03:57:57Z

I did a test (Python 3.6.4, W7x64) to see which solution is the fastest for one folder, no subdirectories, to get a list of complete file paths for files with a specific extension.

To make it short, for this task os.listdir() is the fastest and is 1.7x as fast as the next best: os.walk() (with a break!), 2.7x as fast as pathlib, 3.2x faster than os.scandir() and 3.3x faster than glob.
Please keep in mind, that those results will change when you need recursive results.

import os
import pathlib
import timeit
import glob

def a():
    path = pathlib.Path().cwd()
    list_sqlite_files = [str(f) for f in path.glob("*.sqlite")]

def b(): 
    path = os.getcwd()
    list_sqlite_files = [f.path for f in os.scandir(path) if os.path.splitext(f)[1] == ".sqlite"]

def c():
    path = os.getcwd()
    list_sqlite_files = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".sqlite")]

def d():
    path = os.getcwd()
    os.chdir(path)
    list_sqlite_files = [os.path.join(path, f) for f in glob.glob("*.sqlite")]

def e():
    path = os.getcwd()
    list_sqlite_files = [os.path.join(path, f) for f in glob.glob1(str(path), "*.sqlite")]

def f():
    path = os.getcwd()
    list_sqlite_files = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(".sqlite"):
                list_sqlite_files.append( os.path.join(root, file) )
        break



print(timeit.timeit(a, number=1000))
print(timeit.timeit(b, number=1000))
print(timeit.timeit(c, number=1000))
print(timeit.timeit(d, number=1000))
print(timeit.timeit(e, number=1000))
print(timeit.timeit(f, number=1000))

Results:

# Python 3.6.4
0.431
0.515
0.161
0.548
0.537
0.274
You need to login account before you can post.

About| Privacy statement| Terms of Service| Advertising| Contact us| Help| Sitemap|
Processed in 0.348573 second(s) , Gzip On .

© 2016 Powered by mzan.com design MATCHINFO