Module fileOps.fileOps

Expand source code
import os
import sys
import fileinput

import re
from re import findall

import json
from json import loads

def getLine(fname, lnumber):
    """Jumps to a line number in a file and reads a line.

    Args:
        fname (str): path to the input file.
        lnumber (int): line number.
    Returns:
        (str) string containing the contents of the lnumber'th line.
    """
    assert(lnumber > 0), "Error: Invalid line number!"

    eof = True
    with open(fname, 'r') as fid:
        for i, line in enumerate(fid, start=1):
            if i == lnumber:
                return line

    assert(not eof), "Error: EOF reached!"

def addLineNumber(fin, fout):
    """Reads a file and writes every line of the file to another file with line number added.

    Args:
        fin (str): path to the input file.
        fout (str): path to the output file.
    Returns:
        (file obj) fout same as fin but with line numbers.
    """
    fout_id = open(fout, 'w')

    with open(fin, 'r') as fin_id:
        for i, line in enumerate(fin_id, start=1):
            fout_id.write('%d %s' % (i, line))

    fout_id.close()

def addLineNumber_inplace(fins):
    """Reads a collection of files and adds line numbers to every line of those files in place.

    Args:
        fins (tuple of str): paths to the input files to be edited.
    Returns:
        (file objs) every file in fins with line numbers added to their lines.
    """
    for line in fileinput.input(files=fins, inplace=True):
        sys.stdout.write('%d %s' % (fileinput.filelineno(), line))

def getLine_binarysearch(fname, lnumber):
    """Jumps to a line number in a file and reads that line. 
    NOTE: 
        uses a binary search approach to find the line number, therefore, the file needs to have line numbers
        use addLineNumber or addLineNumber_inplace to create such a file.

    Args:
        fname (str): path to the input file that has line numbers.
        lnumber (int): line number.
    Returns:
        (str) string containing the contents of the lnumber'th line.
    """
    assert(lnumber > 0), "Error: Invalid line number!"

    fid = open(fname, 'r', errors='replace')

    left = 0
    right = os.path.getsize(fname) # interval of bytes
    mid = 0

    sol = None
    while left <= right:
        mid = left + (right - left)//2

        # Step 1: move the pinter to the offset mid
        fid.seek(mid)
        # Step 2: wherever we are, go to the end of the line
        fid.readline()
        # The pointer is now moved to the beginning of the next line
        # Step 3: read the entire line
        line = fid.readline()
        try:
            ln = int(line.split()[0])
        except IndexError:
            print("Error: line has no line number or EOF reached!")
            break

        if lnumber > ln:
            left = mid + 1
        elif lnumber < ln:
            right = mid - 1
        else:
            sol = line.partition(" ")[2]
            fid.close()
            return sol

    # since in step 3 the pointer is moved to the beginning of the next line, binary search will
    # never find the first line, here is a workaround:
    if abs(right - lnumber) < abs(left - lnumber):
        fid.seek(right)
        rline = fid.readline()
        sol = rline.partition(" ")[2]
    else:
        fid.seek(left)
        lline = fid.readline()
        sol = lline.partition(" ")[2]

    fid.close()
    return sol

def findPattern(fin, pattern):
    """Reads a large file and finds lines that match some criteria.

    Args:
        fin (str): path to the input file.
        pattern (str): regex pattern to be matched (ex: r'<title.*>(.*)<\/title>' to search for titles in an xml file).
    Returns:
        (list) list of matches found.
    """
    matchlist = []
    with open(fin, 'r') as fid:
        for _, line in enumerate(fid, start=1):
            matches = re.findall(pattern, line)
            if len(matches) > 0:
                matchlist.extend(matches)

    if len(matchlist) == 0: print("EOF reached.")
    return matchlist

def matchToFile(fin, pattern, fout):
    """Finds lines that match some criteria and writes them to another file.

    Args:
        fin (str): path to the input file.
        pattern (str): regex pattern to be matched (ex: r'<title.*>(.*)<\/title>' to search for titles in an xml file).
        fout (str): path to the output file.
    Returns:
        (file obj) fout filled in with all the lines from fin that contain a match of the pattern (regex).
    """
    fout_id = open(fout, 'w')
    count = 0
    with open(fin, 'r') as fin_id:
        for _, line in enumerate(fin_id, start=1):
            matches = re.findall(pattern, line)
            if len(matches) > 0:
                fout_id.write(line)
                count += 1

    if count == 0: print("EOF reached.")
    fout_id.close()

def jsonToDict(json_file):
    """Reads in a JSON file and converts it into a dictionary.

    Args:
        json_file (str): path to the input file.
    Returns:
        (dict) a dictionary containing the data from the input JSON file.
    """
    with open(json_file, 'r') as fid:
        dout = json.loads(fid.read())

    return dout

class loaded_json(object):
    """Class containing data loaded from an input JSON file.

    Usage: jsondata = loaded_json(file_path)

    TODO: make the class iterable.
    """
    def __init__(self, json_file):
        with open(json_file, 'r') as fid:
            self.__dict__ = json.loads(fid.read())

Functions

def addLineNumber(fin, fout)

Reads a file and writes every line of the file to another file with line number added.

Args

fin : str
path to the input file.
fout : str
path to the output file.

Returns

(file obj) fout same as fin but with line numbers.

Expand source code
def addLineNumber(fin, fout):
    """Reads a file and writes every line of the file to another file with line number added.

    Args:
        fin (str): path to the input file.
        fout (str): path to the output file.
    Returns:
        (file obj) fout same as fin but with line numbers.
    """
    fout_id = open(fout, 'w')

    with open(fin, 'r') as fin_id:
        for i, line in enumerate(fin_id, start=1):
            fout_id.write('%d %s' % (i, line))

    fout_id.close()
def addLineNumber_inplace(fins)

Reads a collection of files and adds line numbers to every line of those files in place.

Args

fins : tuple of str
paths to the input files to be edited.

Returns

(file objs) every file in fins with line numbers added to their lines.

Expand source code
def addLineNumber_inplace(fins):
    """Reads a collection of files and adds line numbers to every line of those files in place.

    Args:
        fins (tuple of str): paths to the input files to be edited.
    Returns:
        (file objs) every file in fins with line numbers added to their lines.
    """
    for line in fileinput.input(files=fins, inplace=True):
        sys.stdout.write('%d %s' % (fileinput.filelineno(), line))
def findPattern(fin, pattern)

Reads a large file and finds lines that match some criteria.

Args

fin : str
path to the input file.
pattern : str
regex pattern to be matched (ex: r'(.*)<\/title>' to search for titles in an xml file).

Returns

(list) list of matches found.

Expand source code
def findPattern(fin, pattern):
    """Reads a large file and finds lines that match some criteria.

    Args:
        fin (str): path to the input file.
        pattern (str): regex pattern to be matched (ex: r'<title.*>(.*)<\/title>' to search for titles in an xml file).
    Returns:
        (list) list of matches found.
    """
    matchlist = []
    with open(fin, 'r') as fid:
        for _, line in enumerate(fid, start=1):
            matches = re.findall(pattern, line)
            if len(matches) > 0:
                matchlist.extend(matches)

    if len(matchlist) == 0: print("EOF reached.")
    return matchlist
def getLine(fname, lnumber)

Jumps to a line number in a file and reads a line.

Args

fname : str
path to the input file.
lnumber : int
line number.

Returns

(str) string containing the contents of the lnumber'th line.

Expand source code
def getLine(fname, lnumber):
    """Jumps to a line number in a file and reads a line.

    Args:
        fname (str): path to the input file.
        lnumber (int): line number.
    Returns:
        (str) string containing the contents of the lnumber'th line.
    """
    assert(lnumber > 0), "Error: Invalid line number!"

    eof = True
    with open(fname, 'r') as fid:
        for i, line in enumerate(fid, start=1):
            if i == lnumber:
                return line

    assert(not eof), "Error: EOF reached!"
def getLine_binarysearch(fname, lnumber)

Jumps to a line number in a file and reads that line. NOTE: uses a binary search approach to find the line number, therefore, the file needs to have line numbers use addLineNumber or addLineNumber_inplace to create such a file.

Args

fname : str
path to the input file that has line numbers.
lnumber : int
line number.

Returns

(str) string containing the contents of the lnumber'th line.

Expand source code
def getLine_binarysearch(fname, lnumber):
    """Jumps to a line number in a file and reads that line. 
    NOTE: 
        uses a binary search approach to find the line number, therefore, the file needs to have line numbers
        use addLineNumber or addLineNumber_inplace to create such a file.

    Args:
        fname (str): path to the input file that has line numbers.
        lnumber (int): line number.
    Returns:
        (str) string containing the contents of the lnumber'th line.
    """
    assert(lnumber > 0), "Error: Invalid line number!"

    fid = open(fname, 'r', errors='replace')

    left = 0
    right = os.path.getsize(fname) # interval of bytes
    mid = 0

    sol = None
    while left <= right:
        mid = left + (right - left)//2

        # Step 1: move the pinter to the offset mid
        fid.seek(mid)
        # Step 2: wherever we are, go to the end of the line
        fid.readline()
        # The pointer is now moved to the beginning of the next line
        # Step 3: read the entire line
        line = fid.readline()
        try:
            ln = int(line.split()[0])
        except IndexError:
            print("Error: line has no line number or EOF reached!")
            break

        if lnumber > ln:
            left = mid + 1
        elif lnumber < ln:
            right = mid - 1
        else:
            sol = line.partition(" ")[2]
            fid.close()
            return sol

    # since in step 3 the pointer is moved to the beginning of the next line, binary search will
    # never find the first line, here is a workaround:
    if abs(right - lnumber) < abs(left - lnumber):
        fid.seek(right)
        rline = fid.readline()
        sol = rline.partition(" ")[2]
    else:
        fid.seek(left)
        lline = fid.readline()
        sol = lline.partition(" ")[2]

    fid.close()
    return sol
def jsonToDict(json_file)

Reads in a JSON file and converts it into a dictionary.

Args

json_file : str
path to the input file.

Returns

(dict) a dictionary containing the data from the input JSON file.

Expand source code
def jsonToDict(json_file):
    """Reads in a JSON file and converts it into a dictionary.

    Args:
        json_file (str): path to the input file.
    Returns:
        (dict) a dictionary containing the data from the input JSON file.
    """
    with open(json_file, 'r') as fid:
        dout = json.loads(fid.read())

    return dout
def matchToFile(fin, pattern, fout)

Finds lines that match some criteria and writes them to another file.

Args

fin : str
path to the input file.
pattern : str
regex pattern to be matched (ex: r'(.*)<\/title>' to search for titles in an xml file).
fout : str
path to the output file.

Returns

(file obj) fout filled in with all the lines from fin that contain a match of the pattern (regex).

Expand source code
def matchToFile(fin, pattern, fout):
    """Finds lines that match some criteria and writes them to another file.

    Args:
        fin (str): path to the input file.
        pattern (str): regex pattern to be matched (ex: r'<title.*>(.*)<\/title>' to search for titles in an xml file).
        fout (str): path to the output file.
    Returns:
        (file obj) fout filled in with all the lines from fin that contain a match of the pattern (regex).
    """
    fout_id = open(fout, 'w')
    count = 0
    with open(fin, 'r') as fin_id:
        for _, line in enumerate(fin_id, start=1):
            matches = re.findall(pattern, line)
            if len(matches) > 0:
                fout_id.write(line)
                count += 1

    if count == 0: print("EOF reached.")
    fout_id.close()

Classes

class loaded_json (json_file)

Class containing data loaded from an input JSON file.

Usage: jsondata = loaded_json(file_path)

TODO: make the class iterable.

Expand source code
class loaded_json(object):
    """Class containing data loaded from an input JSON file.

    Usage: jsondata = loaded_json(file_path)

    TODO: make the class iterable.
    """
    def __init__(self, json_file):
        with open(json_file, 'r') as fid:
            self.__dict__ = json.loads(fid.read())